Compare commits
46 Commits
f443934ee4
...
c
| Author | SHA1 | Date | |
|---|---|---|---|
| de3f5c9677 | |||
| ae2f1c3156 | |||
| 212459a8e4 | |||
| b700dab484 | |||
| 57d79a2759 | |||
| 1c845ba51e | |||
| d64329214d | |||
| 7d6b7da157 | |||
| df9a56af53 | |||
| fd34330081 | |||
| 9b705dd8f8 | |||
| c6484e847e | |||
| fab2559980 | |||
| 9c2df9d674 | |||
| bd0a282344 | |||
| aa6235de5a | |||
| 1c2cf157d2 | |||
| e895cdc9de | |||
|
|
579e67a3db | ||
|
|
ebb047c7b5 | ||
| bb3e7b2989 | |||
| 6ccd9e9fa1 | |||
| 71c026ccf3 | |||
| 4805cc772d | |||
| 3bbeaab0c3 | |||
|
|
a3d8835074 | ||
|
|
947ecb2a2b | ||
| 25c68530ba | |||
|
|
2f507a7546 | ||
|
|
7dc47d310d | ||
| d819f9ca4d | |||
| 9009107a64 | |||
| f00d6661c0 | |||
| 8b6a243a0d | |||
| 7622360a0e | |||
|
|
980d3b45e1 | ||
|
|
7ca136870d | ||
| 09dd9e24c0 | |||
| 14ce3d4e1d | |||
|
|
757e6da2b2 | ||
| 84e0cc54d2 | |||
|
|
6f727dbe57 | ||
|
|
6cedf859db | ||
|
|
6c5d410bdc | ||
|
|
251d69c4ff | ||
|
|
40cd68db3d |
6
.gitignore
vendored
6
.gitignore
vendored
@@ -22,3 +22,9 @@ c/bin
|
|||||||
/.venv
|
/.venv
|
||||||
|
|
||||||
**/build/
|
**/build/
|
||||||
|
|
||||||
|
ros-control-py/install
|
||||||
|
ros-control-py/log
|
||||||
|
scripts/boot/modem_network_info.json
|
||||||
|
|
||||||
|
logs/
|
||||||
5
Makefile
5
Makefile
@@ -43,6 +43,7 @@ TARGETS := \
|
|||||||
CAMERA_VIDEO_SENDER := $(BIN_DIR)/camera_video_sender
|
CAMERA_VIDEO_SENDER := $(BIN_DIR)/camera_video_sender
|
||||||
FFMPEG_PIPELINE_COMMON_SRCS := \
|
FFMPEG_PIPELINE_COMMON_SRCS := \
|
||||||
$(SRC_DIR)/video_pipeline.c \
|
$(SRC_DIR)/video_pipeline.c \
|
||||||
|
$(SRC_DIR)/gps_buffer.c \
|
||||||
$(SRC_DIR)/omni_common.c \
|
$(SRC_DIR)/omni_common.c \
|
||||||
$(SRC_DIR)/protocol.c \
|
$(SRC_DIR)/protocol.c \
|
||||||
$(SRC_DIR)/latencylog.c \
|
$(SRC_DIR)/latencylog.c \
|
||||||
@@ -90,12 +91,12 @@ $(BIN_DIR)/kcpping: $(CMD_DIR)/kcpping.c $(COMMON_SRCS) | $(BIN_DIR)
|
|||||||
$(CC) $(CFLAGS) $(CPPFLAGS) -o $@ $^ $(LDFLAGS)
|
$(CC) $(CFLAGS) $(CPPFLAGS) -o $@ $^ $(LDFLAGS)
|
||||||
|
|
||||||
$(CAMERA_VIDEO_SENDER): $(CAMERA_VIDEO_SENDER_SRCS) | $(BIN_DIR)
|
$(CAMERA_VIDEO_SENDER): $(CAMERA_VIDEO_SENDER_SRCS) | $(BIN_DIR)
|
||||||
$(CC) $(CFLAGS) $(CPPFLAGS) $$(pkg-config --cflags libavformat libavcodec libavutil libswscale) -o $@ $^ $(LDFLAGS) $$(pkg-config --libs libavformat libavcodec libavutil libswscale)
|
$(CC) $(CFLAGS) $(CPPFLAGS) $$(pkg-config --cflags libavformat libavcodec libavutil libswscale) -o $@ $^ $(LDFLAGS) $$(pkg-config --libs libavformat libavcodec libavutil libswscale) -lm
|
||||||
|
|
||||||
camera_video_sender: $(CAMERA_VIDEO_SENDER)
|
camera_video_sender: $(CAMERA_VIDEO_SENDER)
|
||||||
|
|
||||||
$(B_SIDE_OMNID): $(B_SIDE_OMNID_SRCS) | $(BIN_DIR)
|
$(B_SIDE_OMNID): $(B_SIDE_OMNID_SRCS) | $(BIN_DIR)
|
||||||
$(CC) $(CFLAGS) $(CPPFLAGS) $$(pkg-config --cflags libavformat libavcodec libavutil libswscale) -o $@ $^ $(LDFLAGS) $$(pkg-config --libs libavformat libavcodec libavutil libswscale)
|
$(CC) $(CFLAGS) $(CPPFLAGS) $$(pkg-config --cflags libavformat libavcodec libavutil libswscale) -o $@ $^ $(LDFLAGS) $$(pkg-config --libs libavformat libavcodec libavutil libswscale) -lm
|
||||||
|
|
||||||
b_side_omnid: $(B_SIDE_OMNID)
|
b_side_omnid: $(B_SIDE_OMNID)
|
||||||
|
|
||||||
|
|||||||
@@ -34,10 +34,13 @@ Server `D` runs the KCP hub on `0.0.0.0:10909`:
|
|||||||
```bash
|
```bash
|
||||||
./bin/kcpserver -listen 0.0.0.0:10909 \
|
./bin/kcpserver -listen 0.0.0.0:10909 \
|
||||||
-telemetry-peer peer-a-telemetry \
|
-telemetry-peer peer-a-telemetry \
|
||||||
-kcp-ts-debug-log logs/d-kcp-ts.jsonl \
|
-telemetry-interval 1000ms \
|
||||||
-kcp-session-stats-log logs/d-kcp-stats.jsonl
|
-kcp-session-stats-log logs/d-kcp-stats.jsonl \
|
||||||
|
-kcp-session-stats-interval 1000ms
|
||||||
```
|
```
|
||||||
|
|
||||||
|
For multi-hour runs, keep `-latency-log` and `-kcp-ts-debug-log` off unless you are collecting a short repro trace.
|
||||||
|
|
||||||
Relay `C` runs a raw UDP forwarder to `D`:
|
Relay `C` runs a raw UDP forwarder to `D`:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
@@ -17,10 +17,17 @@ int main(void) {
|
|||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (video_pipeline_run(&config, &stats, NULL) != 0) {
|
for (;;) {
|
||||||
perror("video_pipeline_run");
|
int rc = video_pipeline_run(&config, &stats, NULL);
|
||||||
video_pipeline_stats_destroy(&stats);
|
|
||||||
return 1;
|
if (rc == 0) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if (rc != VIDEO_PIPELINE_RUN_RETRY_IMMEDIATE) {
|
||||||
|
perror("video_pipeline_run");
|
||||||
|
video_pipeline_stats_destroy(&stats);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
video_pipeline_stats_destroy(&stats);
|
video_pipeline_stats_destroy(&stats);
|
||||||
|
|||||||
16
include/gps_buffer.h
Normal file
16
include/gps_buffer.h
Normal file
@@ -0,0 +1,16 @@
|
|||||||
|
#ifndef GPS_BUFFER_H
|
||||||
|
#define GPS_BUFFER_H
|
||||||
|
|
||||||
|
#include <stdint.h>
|
||||||
|
|
||||||
|
typedef struct gps_video_sample {
|
||||||
|
double latitude;
|
||||||
|
double longitude;
|
||||||
|
} gps_video_sample_t;
|
||||||
|
|
||||||
|
gps_video_sample_t get_latest_gps_for_video(void);
|
||||||
|
|
||||||
|
|
||||||
|
int gps_buffer_init(const char* host);
|
||||||
|
void gps_buffer_cleanup(void);
|
||||||
|
#endif
|
||||||
@@ -24,8 +24,12 @@ typedef struct kcp_session_stats_record {
|
|||||||
uint32_t rto_ms;
|
uint32_t rto_ms;
|
||||||
int has_srtt_ms;
|
int has_srtt_ms;
|
||||||
int32_t srtt_ms;
|
int32_t srtt_ms;
|
||||||
|
int has_min_srtt_ms;
|
||||||
|
int32_t min_srtt_ms;
|
||||||
int has_srttvar_ms;
|
int has_srttvar_ms;
|
||||||
int32_t srttvar_ms;
|
int32_t srttvar_ms;
|
||||||
|
int has_last_feedback_age_ms;
|
||||||
|
uint32_t last_feedback_age_ms;
|
||||||
int has_snd_wnd;
|
int has_snd_wnd;
|
||||||
uint32_t snd_wnd;
|
uint32_t snd_wnd;
|
||||||
int has_rmt_wnd;
|
int has_rmt_wnd;
|
||||||
|
|||||||
@@ -3,6 +3,7 @@
|
|||||||
|
|
||||||
#include <errno.h>
|
#include <errno.h>
|
||||||
#include <inttypes.h>
|
#include <inttypes.h>
|
||||||
|
#include <limits.h>
|
||||||
#include <pthread.h>
|
#include <pthread.h>
|
||||||
#include <stdarg.h>
|
#include <stdarg.h>
|
||||||
#include <stdbool.h>
|
#include <stdbool.h>
|
||||||
@@ -31,6 +32,15 @@
|
|||||||
typedef struct omni_file_logger {
|
typedef struct omni_file_logger {
|
||||||
FILE *file;
|
FILE *file;
|
||||||
pthread_mutex_t mutex;
|
pthread_mutex_t mutex;
|
||||||
|
char path[PATH_MAX];
|
||||||
|
size_t current_bytes;
|
||||||
|
size_t buffered_bytes;
|
||||||
|
size_t flush_bytes;
|
||||||
|
size_t max_bytes;
|
||||||
|
int flush_interval_ms;
|
||||||
|
int max_files;
|
||||||
|
int immediate_flush;
|
||||||
|
uint64_t last_flush_monotonic_ms;
|
||||||
} omni_file_logger_t;
|
} omni_file_logger_t;
|
||||||
|
|
||||||
int64_t omni_now_unix_nano(void);
|
int64_t omni_now_unix_nano(void);
|
||||||
@@ -61,6 +71,7 @@ double omni_duration_ms_to_ns(double ms);
|
|||||||
const char *omni_path_base_name(const char *path);
|
const char *omni_path_base_name(const char *path);
|
||||||
|
|
||||||
void omni_file_logger_init(omni_file_logger_t *logger, FILE *file);
|
void omni_file_logger_init(omni_file_logger_t *logger, FILE *file);
|
||||||
|
void omni_file_logger_init_path(omni_file_logger_t *logger, FILE *file, const char *path, int immediate_flush);
|
||||||
void omni_file_logger_destroy(omni_file_logger_t *logger);
|
void omni_file_logger_destroy(omni_file_logger_t *logger);
|
||||||
int omni_file_logger_write_line(omni_file_logger_t *logger, const char *line);
|
int omni_file_logger_write_line(omni_file_logger_t *logger, const char *line);
|
||||||
|
|
||||||
|
|||||||
@@ -19,6 +19,7 @@ typedef struct kcp_client_recv_meta {
|
|||||||
typedef struct kcp_client_state {
|
typedef struct kcp_client_state {
|
||||||
int connected;
|
int connected;
|
||||||
int registered;
|
int registered;
|
||||||
|
uint32_t server_idle_ms;
|
||||||
char last_server_error[256];
|
char last_server_error[256];
|
||||||
} kcp_client_state_t;
|
} kcp_client_state_t;
|
||||||
|
|
||||||
@@ -27,6 +28,7 @@ kcp_client_t *kcp_client_dial(const char *server_addr, const char *dial_addr, co
|
|||||||
const char *kcp_client_id(const kcp_client_t *client);
|
const char *kcp_client_id(const kcp_client_t *client);
|
||||||
int kcp_client_send_text(kcp_client_t *client, const char *to, const char *text);
|
int kcp_client_send_text(kcp_client_t *client, const char *to, const char *text);
|
||||||
int kcp_client_send_binary(kcp_client_t *client, const char *to, const void *data, size_t data_len);
|
int kcp_client_send_binary(kcp_client_t *client, const char *to, const void *data, size_t data_len);
|
||||||
|
int kcp_client_send_binary_with_id(kcp_client_t *client, const char *to, const void *data, size_t data_len, uint64_t *out_id);
|
||||||
int kcp_client_send_file_path(kcp_client_t *client, const char *to, const char *path);
|
int kcp_client_send_file_path(kcp_client_t *client, const char *to, const char *path);
|
||||||
int kcp_client_receive_timed(kcp_client_t *client, message_t *out_msg, int timeout_ms);
|
int kcp_client_receive_timed(kcp_client_t *client, message_t *out_msg, int timeout_ms);
|
||||||
int kcp_client_receive(kcp_client_t *client, message_t *out_msg);
|
int kcp_client_receive(kcp_client_t *client, message_t *out_msg);
|
||||||
|
|||||||
@@ -56,7 +56,9 @@ typedef struct kcp_runtime_stats {
|
|||||||
uint32_t conv;
|
uint32_t conv;
|
||||||
uint32_t rto_ms;
|
uint32_t rto_ms;
|
||||||
int32_t srtt_ms;
|
int32_t srtt_ms;
|
||||||
|
int32_t min_srtt_ms;
|
||||||
int32_t srttvar_ms;
|
int32_t srttvar_ms;
|
||||||
|
uint32_t last_feedback_age_ms;
|
||||||
uint32_t snd_wnd;
|
uint32_t snd_wnd;
|
||||||
uint32_t rmt_wnd;
|
uint32_t rmt_wnd;
|
||||||
uint32_t inflight;
|
uint32_t inflight;
|
||||||
|
|||||||
@@ -5,12 +5,37 @@
|
|||||||
#include <signal.h>
|
#include <signal.h>
|
||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
|
|
||||||
|
#include "gps_buffer.h"
|
||||||
|
#include "omni_common.h"
|
||||||
#include "peer_kcp_client.h"
|
#include "peer_kcp_client.h"
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
extern "C" {
|
extern "C" {
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#if defined(__GNUC__)
|
||||||
|
typedef struct __attribute__((packed)) video_pipeline_packet_metadata {
|
||||||
|
#else
|
||||||
|
typedef struct video_pipeline_packet_metadata {
|
||||||
|
#endif
|
||||||
|
uint64_t timestamp_ms;
|
||||||
|
double latitude;
|
||||||
|
double longitude;
|
||||||
|
uint32_t capture_to_send_ms;
|
||||||
|
} video_pipeline_packet_metadata_t;
|
||||||
|
|
||||||
|
typedef struct video_stage_logger {
|
||||||
|
omni_file_logger_t file_logger;
|
||||||
|
int enabled;
|
||||||
|
uint64_t sample_mod;
|
||||||
|
} video_stage_logger_t;
|
||||||
|
|
||||||
|
typedef void (*video_pipeline_progress_fn)(void *context);
|
||||||
|
|
||||||
|
#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L
|
||||||
|
_Static_assert(sizeof(video_pipeline_packet_metadata_t) == 28, "video trailer metadata must be 28 bytes");
|
||||||
|
#endif
|
||||||
|
|
||||||
typedef struct video_pipeline_config {
|
typedef struct video_pipeline_config {
|
||||||
const char *camera_device;
|
const char *camera_device;
|
||||||
const char *server_addr;
|
const char *server_addr;
|
||||||
@@ -25,6 +50,15 @@ typedef struct video_pipeline_config {
|
|||||||
int output_height;
|
int output_height;
|
||||||
int max_frames;
|
int max_frames;
|
||||||
int enable_timing_logs;
|
int enable_timing_logs;
|
||||||
|
int soft_backpressure_segments;
|
||||||
|
int hard_backpressure_segments;
|
||||||
|
int hard_backpressure_hold_ms;
|
||||||
|
int frame_stall_reconnect_ms;
|
||||||
|
kcp_session_stats_logger_t *stats_logger;
|
||||||
|
video_stage_logger_t *stage_logger;
|
||||||
|
int stats_interval_ms;
|
||||||
|
video_pipeline_progress_fn progress_callback;
|
||||||
|
void *progress_context;
|
||||||
} video_pipeline_config_t;
|
} video_pipeline_config_t;
|
||||||
|
|
||||||
typedef struct video_pipeline_stats {
|
typedef struct video_pipeline_stats {
|
||||||
@@ -32,17 +66,27 @@ typedef struct video_pipeline_stats {
|
|||||||
uint64_t frames_sent;
|
uint64_t frames_sent;
|
||||||
uint64_t bytes_sent;
|
uint64_t bytes_sent;
|
||||||
uint64_t send_errors;
|
uint64_t send_errors;
|
||||||
|
uint64_t backpressure_drops;
|
||||||
|
uint64_t backlog_resets;
|
||||||
uint64_t last_frame_bytes;
|
uint64_t last_frame_bytes;
|
||||||
|
uint32_t last_backlog_segments;
|
||||||
|
uint32_t last_capture_to_send_ms;
|
||||||
|
double avg_capture_to_send_ms;
|
||||||
int connected;
|
int connected;
|
||||||
char last_error[256];
|
char last_error[256];
|
||||||
|
char last_backlog_reason[128];
|
||||||
kcp_runtime_stats_t transport;
|
kcp_runtime_stats_t transport;
|
||||||
} video_pipeline_stats_t;
|
} video_pipeline_stats_t;
|
||||||
|
|
||||||
|
#define VIDEO_PIPELINE_RUN_RETRY_IMMEDIATE 2
|
||||||
|
|
||||||
void video_pipeline_config_init(video_pipeline_config_t *config);
|
void video_pipeline_config_init(video_pipeline_config_t *config);
|
||||||
void video_pipeline_config_load_env(video_pipeline_config_t *config);
|
void video_pipeline_config_load_env(video_pipeline_config_t *config);
|
||||||
int video_pipeline_stats_init(video_pipeline_stats_t *stats);
|
int video_pipeline_stats_init(video_pipeline_stats_t *stats);
|
||||||
void video_pipeline_stats_destroy(video_pipeline_stats_t *stats);
|
void video_pipeline_stats_destroy(video_pipeline_stats_t *stats);
|
||||||
void video_pipeline_stats_snapshot(video_pipeline_stats_t *stats, video_pipeline_stats_t *out_stats);
|
void video_pipeline_stats_snapshot(video_pipeline_stats_t *stats, video_pipeline_stats_t *out_stats);
|
||||||
|
video_stage_logger_t *video_stage_logger_open_jsonl(const char *path, uint64_t sample_mod);
|
||||||
|
void video_stage_logger_close(video_stage_logger_t *logger);
|
||||||
int video_pipeline_run(const video_pipeline_config_t *config, video_pipeline_stats_t *stats, volatile sig_atomic_t *stop_requested);
|
int video_pipeline_run(const video_pipeline_config_t *config, video_pipeline_stats_t *stats, volatile sig_atomic_t *stop_requested);
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
|
|||||||
@@ -119,7 +119,9 @@ static PyObject *build_kcp_stats_dict(const omnisocket_session_kcp_stats_t *stat
|
|||||||
SET_KCP_STAT("conv", PyLong_FromUnsignedLong(stats->conv));
|
SET_KCP_STAT("conv", PyLong_FromUnsignedLong(stats->conv));
|
||||||
SET_KCP_STAT("rto_ms", PyLong_FromUnsignedLong(stats->rto_ms));
|
SET_KCP_STAT("rto_ms", PyLong_FromUnsignedLong(stats->rto_ms));
|
||||||
SET_KCP_STAT("srtt_ms", PyLong_FromLong(stats->srtt_ms));
|
SET_KCP_STAT("srtt_ms", PyLong_FromLong(stats->srtt_ms));
|
||||||
|
SET_KCP_STAT("min_srtt_ms", PyLong_FromLong(stats->min_srtt_ms));
|
||||||
SET_KCP_STAT("srttvar_ms", PyLong_FromLong(stats->srttvar_ms));
|
SET_KCP_STAT("srttvar_ms", PyLong_FromLong(stats->srttvar_ms));
|
||||||
|
SET_KCP_STAT("last_feedback_age_ms", PyLong_FromUnsignedLong(stats->last_feedback_age_ms));
|
||||||
SET_KCP_STAT("snd_wnd", PyLong_FromUnsignedLong(stats->snd_wnd));
|
SET_KCP_STAT("snd_wnd", PyLong_FromUnsignedLong(stats->snd_wnd));
|
||||||
SET_KCP_STAT("rmt_wnd", PyLong_FromUnsignedLong(stats->rmt_wnd));
|
SET_KCP_STAT("rmt_wnd", PyLong_FromUnsignedLong(stats->rmt_wnd));
|
||||||
SET_KCP_STAT("inflight", PyLong_FromUnsignedLong(stats->inflight));
|
SET_KCP_STAT("inflight", PyLong_FromUnsignedLong(stats->inflight));
|
||||||
@@ -279,6 +281,29 @@ static PyObject *PyOmniSession_send(PyOmniSession *self, PyObject *args, PyObjec
|
|||||||
Py_RETURN_NONE;
|
Py_RETURN_NONE;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static PyObject *PyOmniSession_send_with_id(PyOmniSession *self, PyObject *args, PyObject *kwargs) {
|
||||||
|
const char *to;
|
||||||
|
Py_buffer payload;
|
||||||
|
int rc;
|
||||||
|
uint64_t message_id = 0;
|
||||||
|
static char *kwlist[] = {"to", "data", NULL};
|
||||||
|
|
||||||
|
memset(&payload, 0, sizeof(payload));
|
||||||
|
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "sy*", kwlist, &to, &payload)) {
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
Py_BEGIN_ALLOW_THREADS
|
||||||
|
rc = omnisocket_session_send_with_id(&self->session, to, payload.buf, (size_t) payload.len, &message_id);
|
||||||
|
Py_END_ALLOW_THREADS
|
||||||
|
|
||||||
|
PyBuffer_Release(&payload);
|
||||||
|
if (rc != 0) {
|
||||||
|
return PyErr_SetFromErrno(PyExc_OSError);
|
||||||
|
}
|
||||||
|
return PyLong_FromUnsignedLongLong((unsigned long long) message_id);
|
||||||
|
}
|
||||||
|
|
||||||
static PyObject *PyOmniSession_recv(PyOmniSession *self, PyObject *args, PyObject *kwargs) {
|
static PyObject *PyOmniSession_recv(PyOmniSession *self, PyObject *args, PyObject *kwargs) {
|
||||||
int timeout_ms = -1;
|
int timeout_ms = -1;
|
||||||
int rc;
|
int rc;
|
||||||
@@ -379,6 +404,7 @@ static PyMethodDef PyOmniSession_methods[] = {
|
|||||||
{"connect", (PyCFunction) PyOmniSession_connect, METH_VARARGS | METH_KEYWORDS, NULL},
|
{"connect", (PyCFunction) PyOmniSession_connect, METH_VARARGS | METH_KEYWORDS, NULL},
|
||||||
{"close", (PyCFunction) PyOmniSession_close, METH_NOARGS, NULL},
|
{"close", (PyCFunction) PyOmniSession_close, METH_NOARGS, NULL},
|
||||||
{"send", (PyCFunction) PyOmniSession_send, METH_VARARGS | METH_KEYWORDS, NULL},
|
{"send", (PyCFunction) PyOmniSession_send, METH_VARARGS | METH_KEYWORDS, NULL},
|
||||||
|
{"send_with_id", (PyCFunction) PyOmniSession_send_with_id, METH_VARARGS | METH_KEYWORDS, NULL},
|
||||||
{"recv", (PyCFunction) PyOmniSession_recv, METH_VARARGS | METH_KEYWORDS, PyOmniSession_recv_doc},
|
{"recv", (PyCFunction) PyOmniSession_recv, METH_VARARGS | METH_KEYWORDS, PyOmniSession_recv_doc},
|
||||||
{"recv_into", (PyCFunction) PyOmniSession_recv_into, METH_VARARGS | METH_KEYWORDS, PyOmniSession_recv_into_doc},
|
{"recv_into", (PyCFunction) PyOmniSession_recv_into, METH_VARARGS | METH_KEYWORDS, PyOmniSession_recv_into_doc},
|
||||||
{"stats", (PyCFunction) PyOmniSession_stats, METH_NOARGS, NULL},
|
{"stats", (PyCFunction) PyOmniSession_stats, METH_NOARGS, NULL},
|
||||||
|
|||||||
@@ -167,6 +167,16 @@ int omnisocket_session_close(omnisocket_session_t *session) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
int omnisocket_session_send(omnisocket_session_t *session, const char *to, const void *data, size_t data_len) {
|
int omnisocket_session_send(omnisocket_session_t *session, const char *to, const void *data, size_t data_len) {
|
||||||
|
return omnisocket_session_send_with_id(session, to, data, data_len, NULL);
|
||||||
|
}
|
||||||
|
|
||||||
|
int omnisocket_session_send_with_id(
|
||||||
|
omnisocket_session_t *session,
|
||||||
|
const char *to,
|
||||||
|
const void *data,
|
||||||
|
size_t data_len,
|
||||||
|
uint64_t *out_message_id
|
||||||
|
) {
|
||||||
kcp_client_t *client;
|
kcp_client_t *client;
|
||||||
int rc;
|
int rc;
|
||||||
|
|
||||||
@@ -178,7 +188,7 @@ int omnisocket_session_send(omnisocket_session_t *session, const char *to, const
|
|||||||
if (omnisocket_session_begin_client_op(session, &client) != 0) {
|
if (omnisocket_session_begin_client_op(session, &client) != 0) {
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
rc = kcp_client_send_binary(client, to, data, data_len);
|
rc = kcp_client_send_binary_with_id(client, to, data, data_len, out_message_id);
|
||||||
pthread_mutex_lock(&session->mutex);
|
pthread_mutex_lock(&session->mutex);
|
||||||
if (rc == 0) {
|
if (rc == 0) {
|
||||||
session->stats.send_calls += 1;
|
session->stats.send_calls += 1;
|
||||||
@@ -297,7 +307,9 @@ void omnisocket_session_kcp_stats_snapshot(omnisocket_session_t *session, omniso
|
|||||||
out_stats->conv = runtime_stats.conv;
|
out_stats->conv = runtime_stats.conv;
|
||||||
out_stats->rto_ms = runtime_stats.rto_ms;
|
out_stats->rto_ms = runtime_stats.rto_ms;
|
||||||
out_stats->srtt_ms = runtime_stats.srtt_ms;
|
out_stats->srtt_ms = runtime_stats.srtt_ms;
|
||||||
|
out_stats->min_srtt_ms = runtime_stats.min_srtt_ms;
|
||||||
out_stats->srttvar_ms = runtime_stats.srttvar_ms;
|
out_stats->srttvar_ms = runtime_stats.srttvar_ms;
|
||||||
|
out_stats->last_feedback_age_ms = runtime_stats.last_feedback_age_ms;
|
||||||
out_stats->snd_wnd = runtime_stats.snd_wnd;
|
out_stats->snd_wnd = runtime_stats.snd_wnd;
|
||||||
out_stats->rmt_wnd = runtime_stats.rmt_wnd;
|
out_stats->rmt_wnd = runtime_stats.rmt_wnd;
|
||||||
out_stats->inflight = runtime_stats.inflight;
|
out_stats->inflight = runtime_stats.inflight;
|
||||||
|
|||||||
@@ -22,7 +22,9 @@ typedef struct omnisocket_session_kcp_stats {
|
|||||||
uint32_t conv;
|
uint32_t conv;
|
||||||
uint32_t rto_ms;
|
uint32_t rto_ms;
|
||||||
int32_t srtt_ms;
|
int32_t srtt_ms;
|
||||||
|
int32_t min_srtt_ms;
|
||||||
int32_t srttvar_ms;
|
int32_t srttvar_ms;
|
||||||
|
uint32_t last_feedback_age_ms;
|
||||||
uint32_t snd_wnd;
|
uint32_t snd_wnd;
|
||||||
uint32_t rmt_wnd;
|
uint32_t rmt_wnd;
|
||||||
uint32_t inflight;
|
uint32_t inflight;
|
||||||
@@ -72,6 +74,13 @@ int omnisocket_session_connect(
|
|||||||
);
|
);
|
||||||
int omnisocket_session_close(omnisocket_session_t *session);
|
int omnisocket_session_close(omnisocket_session_t *session);
|
||||||
int omnisocket_session_send(omnisocket_session_t *session, const char *to, const void *data, size_t data_len);
|
int omnisocket_session_send(omnisocket_session_t *session, const char *to, const void *data, size_t data_len);
|
||||||
|
int omnisocket_session_send_with_id(
|
||||||
|
omnisocket_session_t *session,
|
||||||
|
const char *to,
|
||||||
|
const void *data,
|
||||||
|
size_t data_len,
|
||||||
|
uint64_t *out_message_id
|
||||||
|
);
|
||||||
int omnisocket_session_recv(omnisocket_session_t *session, message_t *out_msg, int timeout_ms);
|
int omnisocket_session_recv(omnisocket_session_t *session, message_t *out_msg, int timeout_ms);
|
||||||
int omnisocket_session_recv_into(
|
int omnisocket_session_recv_into(
|
||||||
omnisocket_session_t *session,
|
omnisocket_session_t *session,
|
||||||
|
|||||||
@@ -56,7 +56,31 @@ def _run_server(binary_name: str, listen_addr: str):
|
|||||||
process.wait(timeout=2.0)
|
process.wait(timeout=2.0)
|
||||||
|
|
||||||
|
|
||||||
def _connect_with_retry(session_cls, *, transport: str, server_addr: str, peer_id: str):
|
@contextmanager
|
||||||
|
def _run_relay(listen_addr: str, remote_addr: str):
|
||||||
|
binary = ROOT / 'bin' / 'kcpserver'
|
||||||
|
if not binary.exists():
|
||||||
|
pytest.skip(f'{binary} is not built')
|
||||||
|
|
||||||
|
process = subprocess.Popen(
|
||||||
|
[str(binary), '-mode', 'relay', '-listen', listen_addr, '-relay-remote', remote_addr],
|
||||||
|
cwd=str(ROOT),
|
||||||
|
stdout=subprocess.DEVNULL,
|
||||||
|
stderr=subprocess.DEVNULL,
|
||||||
|
)
|
||||||
|
try:
|
||||||
|
time.sleep(0.2)
|
||||||
|
yield process
|
||||||
|
finally:
|
||||||
|
process.terminate()
|
||||||
|
try:
|
||||||
|
process.wait(timeout=2.0)
|
||||||
|
except subprocess.TimeoutExpired:
|
||||||
|
process.kill()
|
||||||
|
process.wait(timeout=2.0)
|
||||||
|
|
||||||
|
|
||||||
|
def _connect_with_retry(session_cls, *, transport: str, server_addr: str, peer_id: str, relay_via: str = ''):
|
||||||
deadline = time.monotonic() + 3.0
|
deadline = time.monotonic() + 3.0
|
||||||
last_error: Exception | None = None
|
last_error: Exception | None = None
|
||||||
|
|
||||||
@@ -69,6 +93,8 @@ def _connect_with_retry(session_cls, *, transport: str, server_addr: str, peer_i
|
|||||||
}
|
}
|
||||||
if transport == 'kcp':
|
if transport == 'kcp':
|
||||||
kwargs.update(CONTROL_DEFAULTS)
|
kwargs.update(CONTROL_DEFAULTS)
|
||||||
|
if relay_via:
|
||||||
|
kwargs['relay_via'] = relay_via
|
||||||
else:
|
else:
|
||||||
kwargs['enable_timestamping'] = False
|
kwargs['enable_timestamping'] = False
|
||||||
session.connect(**kwargs)
|
session.connect(**kwargs)
|
||||||
@@ -174,7 +200,7 @@ def test_kcp_idle_video_peers_survive_without_receive_loop() -> None:
|
|||||||
port = _reserve_port()
|
port = _reserve_port()
|
||||||
listen_addr = f'127.0.0.1:{port}'
|
listen_addr = f'127.0.0.1:{port}'
|
||||||
sender_id = 'peer-b-video'
|
sender_id = 'peer-b-video'
|
||||||
receiver_id = 'peer-a-video'
|
receiver_id = 'pytest-kcp-video-idle-receiver'
|
||||||
|
|
||||||
with _run_server('kcpserver', listen_addr):
|
with _run_server('kcpserver', listen_addr):
|
||||||
sender = _connect_with_retry(Session, transport='kcp', server_addr=listen_addr, peer_id=sender_id)
|
sender = _connect_with_retry(Session, transport='kcp', server_addr=listen_addr, peer_id=sender_id)
|
||||||
@@ -194,6 +220,52 @@ def test_kcp_idle_video_peers_survive_without_receive_loop() -> None:
|
|||||||
receiver.close()
|
receiver.close()
|
||||||
|
|
||||||
|
|
||||||
|
def test_kcp_peer_a_video_stale_receiver_is_evicted() -> None:
|
||||||
|
port = _reserve_port()
|
||||||
|
listen_addr = f'127.0.0.1:{port}'
|
||||||
|
receiver_id = 'peer-a-video'
|
||||||
|
|
||||||
|
with _run_server('kcpserver', listen_addr):
|
||||||
|
receiver = _connect_with_retry(Session, transport='kcp', server_addr=listen_addr, peer_id=receiver_id)
|
||||||
|
|
||||||
|
try:
|
||||||
|
time.sleep(5.0)
|
||||||
|
with pytest.raises(OSError):
|
||||||
|
receiver.recv(timeout_ms=1000)
|
||||||
|
finally:
|
||||||
|
receiver.close()
|
||||||
|
|
||||||
|
|
||||||
|
def test_kcp_relay_routes_multiple_sessions_by_conv() -> None:
|
||||||
|
hub_port = _reserve_port()
|
||||||
|
relay_port = _reserve_port()
|
||||||
|
hub_addr = f'127.0.0.1:{hub_port}'
|
||||||
|
relay_addr = f'127.0.0.1:{relay_port}'
|
||||||
|
|
||||||
|
with _run_server('kcpserver', hub_addr):
|
||||||
|
with _run_relay(relay_addr, hub_addr):
|
||||||
|
sender = _connect_with_retry(Session, transport='kcp', server_addr=hub_addr, peer_id='pytest-relay-sender', relay_via=relay_addr)
|
||||||
|
receiver = _connect_with_retry(Session, transport='kcp', server_addr=hub_addr, peer_id='pytest-relay-receiver', relay_via=relay_addr)
|
||||||
|
chatter = _connect_with_retry(Session, transport='kcp', server_addr=hub_addr, peer_id='pytest-relay-chatter', relay_via=relay_addr)
|
||||||
|
|
||||||
|
try:
|
||||||
|
chatter.send(to='pytest-relay-sender', data=b'chatter-primes-last-client')
|
||||||
|
from_peer, msg_type, recv_payload = sender.recv(timeout_ms=1000)
|
||||||
|
assert from_peer == 'pytest-relay-chatter'
|
||||||
|
assert msg_type == MSG_TYPE_BINARY
|
||||||
|
assert recv_payload == b'chatter-primes-last-client'
|
||||||
|
|
||||||
|
sender.send(to='pytest-relay-receiver', data=b'relay-video-frame')
|
||||||
|
from_peer, msg_type, recv_payload = receiver.recv(timeout_ms=1000)
|
||||||
|
assert from_peer == 'pytest-relay-sender'
|
||||||
|
assert msg_type == MSG_TYPE_BINARY
|
||||||
|
assert recv_payload == b'relay-video-frame'
|
||||||
|
finally:
|
||||||
|
sender.close()
|
||||||
|
receiver.close()
|
||||||
|
chatter.close()
|
||||||
|
|
||||||
|
|
||||||
def test_udp_session_close_interrupts_blocking_recv() -> None:
|
def test_udp_session_close_interrupts_blocking_recv() -> None:
|
||||||
port = _reserve_port()
|
port = _reserve_port()
|
||||||
listen_addr = f'127.0.0.1:{port}'
|
listen_addr = f'127.0.0.1:{port}'
|
||||||
|
|||||||
@@ -24,7 +24,6 @@ setup(
|
|||||||
maintainer_email='codex@example.com',
|
maintainer_email='codex@example.com',
|
||||||
description='ROS 2 OmniSocket UDP/KCP bridge for teleop TwistStamped commands.',
|
description='ROS 2 OmniSocket UDP/KCP bridge for teleop TwistStamped commands.',
|
||||||
license='MIT',
|
license='MIT',
|
||||||
tests_require=['pytest'],
|
|
||||||
entry_points={
|
entry_points={
|
||||||
'console_scripts': [
|
'console_scripts': [
|
||||||
'cmd_vel_udp_sender = udp_teleop_bridge.cmd_vel_udp_sender:main',
|
'cmd_vel_udp_sender = udp_teleop_bridge.cmd_vel_udp_sender:main',
|
||||||
|
|||||||
@@ -72,6 +72,12 @@ class OmniTransport:
|
|||||||
def send(self, *, to: str, data: bytes) -> None:
|
def send(self, *, to: str, data: bytes) -> None:
|
||||||
self._session.send(to=to, data=data)
|
self._session.send(to=to, data=data)
|
||||||
|
|
||||||
|
def send_with_id(self, *, to: str, data: bytes) -> int:
|
||||||
|
if not hasattr(self._session, 'send_with_id'):
|
||||||
|
self._session.send(to=to, data=data)
|
||||||
|
raise RuntimeError('send_with_id is not available on this omnisocket build')
|
||||||
|
return int(self._session.send_with_id(to=to, data=data))
|
||||||
|
|
||||||
def recv(self, *, timeout_ms: int = -1):
|
def recv(self, *, timeout_ms: int = -1):
|
||||||
return self._session.recv(timeout_ms=timeout_ms)
|
return self._session.recv(timeout_ms=timeout_ms)
|
||||||
|
|
||||||
|
|||||||
@@ -2,6 +2,7 @@
|
|||||||
|
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import json
|
||||||
import os
|
import os
|
||||||
import socket
|
import socket
|
||||||
import threading
|
import threading
|
||||||
@@ -90,8 +91,14 @@ class UdpCmdVelReceiver(Node):
|
|||||||
self._last_published_command: CommandTuple = ZERO_COMMAND
|
self._last_published_command: CommandTuple = ZERO_COMMAND
|
||||||
self._closing = threading.Event()
|
self._closing = threading.Event()
|
||||||
self._recv_buffer = bytearray(DEFAULT_RECV_BUFFER_BYTES)
|
self._recv_buffer = bytearray(DEFAULT_RECV_BUFFER_BYTES)
|
||||||
|
self._runtime_dir = os.getenv('BLITZ_RUNTIME_DIR', '/run/blitz-robot').strip() or '/run/blitz-robot'
|
||||||
|
self._status_path = os.path.join(self._runtime_dir, 'ros-receiver.status.json')
|
||||||
|
self._transport_reconnect_count = 0
|
||||||
|
self._recv_thread_heartbeat_epoch_ms = self._now_epoch_ms()
|
||||||
|
self._runtime_last_error = ''
|
||||||
|
|
||||||
self.create_timer(1.0 / self._publish_rate_hz, self._publish_tick)
|
self.create_timer(1.0 / self._publish_rate_hz, self._publish_tick)
|
||||||
|
self.create_timer(1.0, self._write_status_tick)
|
||||||
|
|
||||||
recv_target = self._recv_loop_unix_dgram if self._transport_name == 'unix_dgram' else self._recv_loop
|
recv_target = self._recv_loop_unix_dgram if self._transport_name == 'unix_dgram' else self._recv_loop
|
||||||
self._recv_thread = threading.Thread(target=recv_target, daemon=True)
|
self._recv_thread = threading.Thread(target=recv_target, daemon=True)
|
||||||
@@ -174,6 +181,8 @@ class UdpCmdVelReceiver(Node):
|
|||||||
pass
|
pass
|
||||||
try:
|
try:
|
||||||
self._transport = self._create_transport()
|
self._transport = self._create_transport()
|
||||||
|
self._transport_reconnect_count += 1
|
||||||
|
self._set_runtime_last_error('')
|
||||||
if self._should_log('transport_reconnected', 1.0):
|
if self._should_log('transport_reconnected', 1.0):
|
||||||
self.get_logger().info(
|
self.get_logger().info(
|
||||||
'Reconnected OmniSocket transport %s://%s as %s'
|
'Reconnected OmniSocket transport %s://%s as %s'
|
||||||
@@ -182,6 +191,7 @@ class UdpCmdVelReceiver(Node):
|
|||||||
return True
|
return True
|
||||||
except OSError as exc:
|
except OSError as exc:
|
||||||
self._transport = None
|
self._transport = None
|
||||||
|
self._set_runtime_last_error(str(exc))
|
||||||
if self._should_log('transport_reconnect_error', 2.0):
|
if self._should_log('transport_reconnect_error', 2.0):
|
||||||
self.get_logger().error(f'Failed to reconnect OmniSocket transport: {exc}')
|
self.get_logger().error(f'Failed to reconnect OmniSocket transport: {exc}')
|
||||||
time.sleep(0.5)
|
time.sleep(0.5)
|
||||||
@@ -192,10 +202,13 @@ class UdpCmdVelReceiver(Node):
|
|||||||
self._close_unix_socket()
|
self._close_unix_socket()
|
||||||
try:
|
try:
|
||||||
self._setup_unix_socket()
|
self._setup_unix_socket()
|
||||||
|
self._transport_reconnect_count += 1
|
||||||
|
self._set_runtime_last_error('')
|
||||||
if self._should_log('unix_rebound', 1.0):
|
if self._should_log('unix_rebound', 1.0):
|
||||||
self.get_logger().info(f'Rebound unix datagram socket at {self._local_socket_path}')
|
self.get_logger().info(f'Rebound unix datagram socket at {self._local_socket_path}')
|
||||||
return True
|
return True
|
||||||
except OSError as exc:
|
except OSError as exc:
|
||||||
|
self._set_runtime_last_error(str(exc))
|
||||||
if self._should_log('unix_rebind_error', 2.0):
|
if self._should_log('unix_rebind_error', 2.0):
|
||||||
self.get_logger().error(f'Failed to rebind unix datagram socket: {exc}')
|
self.get_logger().error(f'Failed to rebind unix datagram socket: {exc}')
|
||||||
time.sleep(0.5)
|
time.sleep(0.5)
|
||||||
@@ -209,6 +222,61 @@ class UdpCmdVelReceiver(Node):
|
|||||||
return True
|
return True
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
def _now_epoch_ms(self) -> int:
|
||||||
|
return time.time_ns() // 1_000_000
|
||||||
|
|
||||||
|
def _update_recv_heartbeat(self) -> None:
|
||||||
|
with self._lock:
|
||||||
|
self._recv_thread_heartbeat_epoch_ms = self._now_epoch_ms()
|
||||||
|
|
||||||
|
def _last_packet_age_ms(self) -> int | None:
|
||||||
|
with self._lock:
|
||||||
|
last_packet_monotonic = self._last_packet_monotonic
|
||||||
|
if last_packet_monotonic is None:
|
||||||
|
return None
|
||||||
|
return max(0, int((time.monotonic() - last_packet_monotonic) * 1000.0))
|
||||||
|
|
||||||
|
def _socket_bound(self) -> bool:
|
||||||
|
if self._transport_name == 'unix_dgram':
|
||||||
|
return self._unix_socket is not None and os.path.exists(self._local_socket_path)
|
||||||
|
return self._transport is not None
|
||||||
|
|
||||||
|
def _set_runtime_last_error(self, message: str) -> None:
|
||||||
|
self._runtime_last_error = message
|
||||||
|
|
||||||
|
def _status_payload(self) -> dict[str, object]:
|
||||||
|
with self._lock:
|
||||||
|
recv_thread_heartbeat_epoch_ms = self._recv_thread_heartbeat_epoch_ms
|
||||||
|
return {
|
||||||
|
'updated_at_epoch_ms': self._now_epoch_ms(),
|
||||||
|
'pid': os.getpid(),
|
||||||
|
'recv_thread_heartbeat_epoch_ms': recv_thread_heartbeat_epoch_ms,
|
||||||
|
'transport': self._transport_name,
|
||||||
|
'local_socket_path': self._local_socket_path,
|
||||||
|
'socket_bound': self._socket_bound(),
|
||||||
|
'transport_reconnect_count': self._transport_reconnect_count,
|
||||||
|
'last_packet_age_ms': self._last_packet_age_ms(),
|
||||||
|
'last_error': self._runtime_last_error,
|
||||||
|
}
|
||||||
|
|
||||||
|
def _write_status_tick(self) -> None:
|
||||||
|
payload = self._status_payload()
|
||||||
|
if self._transport_name == 'unix_dgram':
|
||||||
|
if self._unix_socket is None:
|
||||||
|
payload['last_error'] = self._runtime_last_error or 'unix datagram socket is not bound'
|
||||||
|
else:
|
||||||
|
if self._transport is None:
|
||||||
|
payload['last_error'] = self._runtime_last_error or 'OmniSocket transport is not connected'
|
||||||
|
try:
|
||||||
|
os.makedirs(self._runtime_dir, exist_ok=True)
|
||||||
|
temp_path = f'{self._status_path}.tmp.{os.getpid()}'
|
||||||
|
with open(temp_path, 'w', encoding='utf-8') as handle:
|
||||||
|
json.dump(payload, handle, ensure_ascii=True, separators=(',', ':'))
|
||||||
|
os.replace(temp_path, self._status_path)
|
||||||
|
except OSError as exc:
|
||||||
|
if self._should_log('status_write_error', 5.0):
|
||||||
|
self.get_logger().warning(f'Failed to write receiver status file: {exc}')
|
||||||
|
|
||||||
def _publish_command(self, command: CommandTuple) -> None:
|
def _publish_command(self, command: CommandTuple) -> None:
|
||||||
msg = TwistStamped()
|
msg = TwistStamped()
|
||||||
msg.header.stamp = self.get_clock().now().to_msg()
|
msg.header.stamp = self.get_clock().now().to_msg()
|
||||||
@@ -229,32 +297,39 @@ class UdpCmdVelReceiver(Node):
|
|||||||
|
|
||||||
def _recv_loop(self) -> None:
|
def _recv_loop(self) -> None:
|
||||||
while not self._closing.is_set() and rclpy.ok():
|
while not self._closing.is_set() and rclpy.ok():
|
||||||
|
self._update_recv_heartbeat()
|
||||||
try:
|
try:
|
||||||
assert self._transport is not None
|
assert self._transport is not None
|
||||||
meta = self._transport.recv_into(buffer=self._recv_buffer, timeout_ms=100)
|
meta = self._transport.recv_into(buffer=self._recv_buffer, timeout_ms=100)
|
||||||
except BufferError as exc:
|
except BufferError as exc:
|
||||||
|
self._set_runtime_last_error(str(exc))
|
||||||
if self._should_log('buffer_error', 2.0):
|
if self._should_log('buffer_error', 2.0):
|
||||||
self.get_logger().warning(f'Dropped oversized OmniSocket frame: {exc}')
|
self.get_logger().warning(f'Dropped oversized OmniSocket frame: {exc}')
|
||||||
continue
|
continue
|
||||||
except OSError as exc:
|
except OSError as exc:
|
||||||
|
self._set_runtime_last_error(str(exc))
|
||||||
if not self._closing.is_set() and self._should_log('recv_error', 2.0):
|
if not self._closing.is_set() and self._should_log('recv_error', 2.0):
|
||||||
self.get_logger().error(f'OmniSocket receive loop stopped: {exc}')
|
self.get_logger().error(f'OmniSocket receive loop stopped: {exc}')
|
||||||
if not self._reconnect_transport():
|
if not self._reconnect_transport():
|
||||||
return
|
return
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
self._update_recv_heartbeat()
|
||||||
if meta is None:
|
if meta is None:
|
||||||
continue
|
continue
|
||||||
|
self._set_runtime_last_error('')
|
||||||
|
|
||||||
from_peer = str(meta['from'])
|
from_peer = str(meta['from'])
|
||||||
msg_type = int(meta['msg_type'])
|
msg_type = int(meta['msg_type'])
|
||||||
body_len = int(meta['body_len'])
|
body_len = int(meta['body_len'])
|
||||||
|
|
||||||
if msg_type == self._msg_type_error:
|
if msg_type == self._msg_type_error:
|
||||||
|
self._set_runtime_last_error(f'server error message from {from_peer}')
|
||||||
self._handle_error_message(from_peer, body_len)
|
self._handle_error_message(from_peer, body_len)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
if self._expected_sender and from_peer != self._expected_sender:
|
if self._expected_sender and from_peer != self._expected_sender:
|
||||||
|
self._set_runtime_last_error(f'unexpected sender {from_peer}')
|
||||||
if self._should_log('unexpected_sender', 2.0):
|
if self._should_log('unexpected_sender', 2.0):
|
||||||
self.get_logger().warning(
|
self.get_logger().warning(
|
||||||
'Ignoring message from unexpected sender %s (expected %s)'
|
'Ignoring message from unexpected sender %s (expected %s)'
|
||||||
@@ -263,6 +338,7 @@ class UdpCmdVelReceiver(Node):
|
|||||||
continue
|
continue
|
||||||
|
|
||||||
if msg_type != self._msg_type_binary:
|
if msg_type != self._msg_type_binary:
|
||||||
|
self._set_runtime_last_error(f'unexpected message type {msg_type}')
|
||||||
if self._should_log('unexpected_type', 2.0):
|
if self._should_log('unexpected_type', 2.0):
|
||||||
self.get_logger().warning(
|
self.get_logger().warning(
|
||||||
'Ignoring unexpected message type %d from %s (%d bytes)'
|
'Ignoring unexpected message type %d from %s (%d bytes)'
|
||||||
@@ -271,6 +347,7 @@ class UdpCmdVelReceiver(Node):
|
|||||||
continue
|
continue
|
||||||
|
|
||||||
if body_len != PACKET_SIZE:
|
if body_len != PACKET_SIZE:
|
||||||
|
self._set_runtime_last_error(f'invalid payload size {body_len}')
|
||||||
if self._should_log('packet_size', 2.0):
|
if self._should_log('packet_size', 2.0):
|
||||||
self.get_logger().warning(
|
self.get_logger().warning(
|
||||||
'Dropped binary payload from %s with invalid size %d (expected %d)'
|
'Dropped binary payload from %s with invalid size %d (expected %d)'
|
||||||
@@ -281,6 +358,7 @@ class UdpCmdVelReceiver(Node):
|
|||||||
try:
|
try:
|
||||||
command = unpack_command(self._recv_buffer[:PACKET_SIZE])
|
command = unpack_command(self._recv_buffer[:PACKET_SIZE])
|
||||||
except ValueError as exc:
|
except ValueError as exc:
|
||||||
|
self._set_runtime_last_error(str(exc))
|
||||||
if self._should_log('decode_error', 2.0):
|
if self._should_log('decode_error', 2.0):
|
||||||
self.get_logger().warning(f'Dropped malformed command payload: {exc}')
|
self.get_logger().warning(f'Dropped malformed command payload: {exc}')
|
||||||
continue
|
continue
|
||||||
@@ -288,15 +366,18 @@ class UdpCmdVelReceiver(Node):
|
|||||||
with self._lock:
|
with self._lock:
|
||||||
self._latest_command = command
|
self._latest_command = command
|
||||||
self._last_packet_monotonic = time.monotonic()
|
self._last_packet_monotonic = time.monotonic()
|
||||||
|
self._set_runtime_last_error('')
|
||||||
|
|
||||||
def _recv_loop_unix_dgram(self) -> None:
|
def _recv_loop_unix_dgram(self) -> None:
|
||||||
assert self._unix_socket is not None
|
assert self._unix_socket is not None
|
||||||
|
|
||||||
while not self._closing.is_set() and rclpy.ok():
|
while not self._closing.is_set() and rclpy.ok():
|
||||||
|
self._update_recv_heartbeat()
|
||||||
try:
|
try:
|
||||||
payload = self._unix_socket.recv(DEFAULT_RECV_BUFFER_BYTES)
|
payload = self._unix_socket.recv(DEFAULT_RECV_BUFFER_BYTES)
|
||||||
except socket.timeout:
|
except socket.timeout:
|
||||||
if not os.path.exists(self._local_socket_path):
|
if not os.path.exists(self._local_socket_path):
|
||||||
|
self._set_runtime_last_error('unix datagram socket path disappeared')
|
||||||
if self._should_log('unix_socket_missing', 2.0):
|
if self._should_log('unix_socket_missing', 2.0):
|
||||||
self.get_logger().warning(
|
self.get_logger().warning(
|
||||||
f'Unix datagram socket path disappeared, rebinding {self._local_socket_path}'
|
f'Unix datagram socket path disappeared, rebinding {self._local_socket_path}'
|
||||||
@@ -305,13 +386,16 @@ class UdpCmdVelReceiver(Node):
|
|||||||
return
|
return
|
||||||
continue
|
continue
|
||||||
except OSError as exc:
|
except OSError as exc:
|
||||||
|
self._set_runtime_last_error(str(exc))
|
||||||
if not self._closing.is_set() and self._should_log('unix_recv_error', 2.0):
|
if not self._closing.is_set() and self._should_log('unix_recv_error', 2.0):
|
||||||
self.get_logger().error(f'Unix datagram receive loop stopped: {exc}')
|
self.get_logger().error(f'Unix datagram receive loop stopped: {exc}')
|
||||||
if not self._rebind_unix_socket():
|
if not self._rebind_unix_socket():
|
||||||
return
|
return
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
self._update_recv_heartbeat()
|
||||||
if len(payload) != PACKET_SIZE:
|
if len(payload) != PACKET_SIZE:
|
||||||
|
self._set_runtime_last_error(f'invalid unix datagram payload size {len(payload)}')
|
||||||
if self._should_log('unix_packet_size', 2.0):
|
if self._should_log('unix_packet_size', 2.0):
|
||||||
self.get_logger().warning(
|
self.get_logger().warning(
|
||||||
'Dropped unix datagram payload with invalid size %d (expected %d)'
|
'Dropped unix datagram payload with invalid size %d (expected %d)'
|
||||||
@@ -322,6 +406,7 @@ class UdpCmdVelReceiver(Node):
|
|||||||
try:
|
try:
|
||||||
command = unpack_command(payload)
|
command = unpack_command(payload)
|
||||||
except ValueError as exc:
|
except ValueError as exc:
|
||||||
|
self._set_runtime_last_error(str(exc))
|
||||||
if self._should_log('unix_decode_error', 2.0):
|
if self._should_log('unix_decode_error', 2.0):
|
||||||
self.get_logger().warning(f'Dropped malformed unix datagram payload: {exc}')
|
self.get_logger().warning(f'Dropped malformed unix datagram payload: {exc}')
|
||||||
continue
|
continue
|
||||||
@@ -329,6 +414,7 @@ class UdpCmdVelReceiver(Node):
|
|||||||
with self._lock:
|
with self._lock:
|
||||||
self._latest_command = command
|
self._latest_command = command
|
||||||
self._last_packet_monotonic = time.monotonic()
|
self._last_packet_monotonic = time.monotonic()
|
||||||
|
self._set_runtime_last_error('')
|
||||||
|
|
||||||
def _command_for_publish_tick(self) -> tuple[CommandTuple, Optional[float], bool]:
|
def _command_for_publish_tick(self) -> tuple[CommandTuple, Optional[float], bool]:
|
||||||
with self._lock:
|
with self._lock:
|
||||||
|
|||||||
180
scripts/boot/5g-dial.sh
Normal file
180
scripts/boot/5g-dial.sh
Normal file
@@ -0,0 +1,180 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||||
|
# shellcheck disable=SC1091
|
||||||
|
source "${SCRIPT_DIR}/common.sh"
|
||||||
|
|
||||||
|
STEP="5g-dial"
|
||||||
|
|
||||||
|
append_route_targets() {
|
||||||
|
local raw_list="$1"
|
||||||
|
local target
|
||||||
|
|
||||||
|
if [[ -z "${raw_list}" ]]; then
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
|
||||||
|
for target in ${raw_list//,/ }; do
|
||||||
|
if [[ -z "${target}" ]]; then
|
||||||
|
continue
|
||||||
|
fi
|
||||||
|
dial_cmd+=(--route-target "${target}")
|
||||||
|
done
|
||||||
|
}
|
||||||
|
|
||||||
|
read_detected_interface() {
|
||||||
|
local info_json="$1"
|
||||||
|
|
||||||
|
if [[ ! -f "${info_json}" ]]; then
|
||||||
|
return 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
python3 -c 'import json, sys; print((json.load(open(sys.argv[1], encoding="utf-8")).get("interface") or "").strip())' "${info_json}"
|
||||||
|
}
|
||||||
|
|
||||||
|
disable_interfaces() {
|
||||||
|
local raw_list="$1"
|
||||||
|
local iface
|
||||||
|
local nmcli_available=0
|
||||||
|
|
||||||
|
if [[ -z "${raw_list}" ]]; then
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
if command -v nmcli >/dev/null 2>&1; then
|
||||||
|
nmcli_available=1
|
||||||
|
fi
|
||||||
|
|
||||||
|
for iface in ${raw_list//,/ }; do
|
||||||
|
if [[ -z "${iface}" ]]; then
|
||||||
|
continue
|
||||||
|
fi
|
||||||
|
blitz_log "${STEP}" "disable-interface" "start" "iface=${iface}" 0
|
||||||
|
if [[ "${nmcli_available}" -eq 1 ]]; then
|
||||||
|
nmcli device disconnect "${iface}" >/dev/null 2>&1 || true
|
||||||
|
fi
|
||||||
|
if ip link show dev "${iface}" >/dev/null 2>&1; then
|
||||||
|
if ip link set dev "${iface}" down; then
|
||||||
|
blitz_log "${STEP}" "disable-interface" "success" "iface=${iface}" 0
|
||||||
|
else
|
||||||
|
rc=$?
|
||||||
|
blitz_log "${STEP}" "disable-interface" "failure" "iface=${iface}" "${rc}"
|
||||||
|
return "${rc}"
|
||||||
|
fi
|
||||||
|
else
|
||||||
|
blitz_log "${STEP}" "disable-interface" "success" "iface=${iface} not present, skipping" 0
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
}
|
||||||
|
|
||||||
|
wait_for_serial() {
|
||||||
|
local serial_port="$1"
|
||||||
|
local timeout_sec="$2"
|
||||||
|
local waited=0
|
||||||
|
|
||||||
|
while (( waited < timeout_sec )); do
|
||||||
|
if [[ -e "${serial_port}" ]]; then
|
||||||
|
blitz_log "${STEP}" "wait-serial" "success" "serial_port=${serial_port} waited_sec=${waited}" 0
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
if (( waited == 0 || waited % 5 == 0 )); then
|
||||||
|
blitz_log "${STEP}" "wait-serial" "waiting" "serial_port=${serial_port} waited_sec=${waited}" 0
|
||||||
|
fi
|
||||||
|
sleep 1
|
||||||
|
waited=$(( waited + 1 ))
|
||||||
|
done
|
||||||
|
|
||||||
|
blitz_log "${STEP}" "wait-serial" "failure" "serial_port=${serial_port} timeout_sec=${timeout_sec}" 1
|
||||||
|
return 1
|
||||||
|
}
|
||||||
|
|
||||||
|
wait_for_route() {
|
||||||
|
local target_ip="$1"
|
||||||
|
local timeout_sec="$2"
|
||||||
|
local expected_interface="${3:-}"
|
||||||
|
local waited=0
|
||||||
|
local route_output
|
||||||
|
|
||||||
|
while (( waited < timeout_sec )); do
|
||||||
|
route_output="$(blitz_route_ready "${target_ip}" "${expected_interface}" || true)"
|
||||||
|
if [[ -n "${route_output}" ]]; then
|
||||||
|
blitz_log "${STEP}" "route-check" "success" "target_ip=${target_ip} interface=${expected_interface:-auto} route=${route_output}" 0
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
if (( waited == 0 || waited % 5 == 0 )); then
|
||||||
|
blitz_log "${STEP}" "route-check" "waiting" "target_ip=${target_ip} interface=${expected_interface:-auto} waited_sec=${waited}" 0
|
||||||
|
fi
|
||||||
|
sleep 1
|
||||||
|
waited=$(( waited + 1 ))
|
||||||
|
done
|
||||||
|
|
||||||
|
blitz_log "${STEP}" "route-check" "failure" "target_ip=${target_ip} interface=${expected_interface:-auto} timeout_sec=${timeout_sec}" 1
|
||||||
|
return 1
|
||||||
|
}
|
||||||
|
|
||||||
|
blitz_load_boot_env
|
||||||
|
blitz_require_root "${STEP}"
|
||||||
|
blitz_require_command ip "${STEP}"
|
||||||
|
blitz_require_command python3 "${STEP}"
|
||||||
|
blitz_require_file "${BLITZ_5G_DIAL_DIR}/rndis_dial.py" "${STEP}"
|
||||||
|
|
||||||
|
if [[ -z "${BLITZ_TIME_SERVER_IP}" ]]; then
|
||||||
|
blitz_log "${STEP}" "precheck" "failure" "BLITZ_TIME_SERVER_IP is empty and no fallback could be derived" 1
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
disable_interfaces "${BLITZ_5G_DISABLE_INTERFACES:-}"
|
||||||
|
|
||||||
|
if [[ -n "${BLITZ_5G_INTERFACE:-}" ]]; then
|
||||||
|
route_output="$(blitz_route_ready "${BLITZ_TIME_SERVER_IP}" "${BLITZ_5G_INTERFACE}" || true)"
|
||||||
|
if [[ -n "${route_output}" ]]; then
|
||||||
|
blitz_log "${STEP}" "dial" "already_up" "target_ip=${BLITZ_TIME_SERVER_IP} interface=${BLITZ_5G_INTERFACE} route=${route_output}" 0
|
||||||
|
exit 0
|
||||||
|
fi
|
||||||
|
else
|
||||||
|
blitz_log "${STEP}" "route-check" "info" "BLITZ_5G_INTERFACE is empty, skipping pre-dial route shortcut and using auto-detect mode" 0
|
||||||
|
fi
|
||||||
|
|
||||||
|
wait_for_serial "${BLITZ_5G_SERIAL_PORT}" "${BLITZ_5G_SERIAL_WAIT_SEC}"
|
||||||
|
|
||||||
|
dial_cmd=(
|
||||||
|
python3
|
||||||
|
rndis_dial.py
|
||||||
|
--serial-port "${BLITZ_5G_SERIAL_PORT}"
|
||||||
|
--modem-subnet "${BLITZ_5G_MODEM_SUBNET}"
|
||||||
|
)
|
||||||
|
if [[ -n "${BLITZ_5G_INTERFACE:-}" ]]; then
|
||||||
|
dial_cmd+=(--interface "${BLITZ_5G_INTERFACE}")
|
||||||
|
fi
|
||||||
|
case "${BLITZ_5G_SKIP_DHCP:-0}" in
|
||||||
|
1|true|TRUE|yes|YES)
|
||||||
|
dial_cmd+=(--skip-dhcp)
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
case "${BLITZ_5G_REMOVE_DEFAULT_ROUTE:-1}" in
|
||||||
|
1|true|TRUE|yes|YES)
|
||||||
|
dial_cmd+=(--remove-default-route --gateway "${BLITZ_5G_GATEWAY}" --route-target "${BLITZ_TIME_SERVER_IP}")
|
||||||
|
append_route_targets "${BLITZ_5G_ROUTE_TARGETS:-}"
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
|
||||||
|
pushd "${BLITZ_5G_DIAL_DIR}" >/dev/null
|
||||||
|
blitz_run "${STEP}" "dial" "${dial_cmd[@]}"
|
||||||
|
popd >/dev/null
|
||||||
|
|
||||||
|
resolved_interface="${BLITZ_5G_INTERFACE:-}"
|
||||||
|
if [[ -z "${resolved_interface}" ]]; then
|
||||||
|
resolved_interface="$(read_detected_interface "${BLITZ_5G_INFO_JSON}" || true)"
|
||||||
|
if [[ -n "${resolved_interface}" ]]; then
|
||||||
|
blitz_log "${STEP}" "resolve-interface" "success" "resolved interface from ${BLITZ_5G_INFO_JSON}: ${resolved_interface}" 0
|
||||||
|
else
|
||||||
|
blitz_log "${STEP}" "resolve-interface" "failure" "failed to read detected interface from ${BLITZ_5G_INFO_JSON}" 1
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [[ -n "${resolved_interface}" ]]; then
|
||||||
|
wait_for_route "${BLITZ_TIME_SERVER_IP}" "${BLITZ_5G_ROUTE_WAIT_SEC}" "${resolved_interface}"
|
||||||
|
blitz_log "${STEP}" "complete" "success" "5G dial completed and route is ready on ${resolved_interface}" 0
|
||||||
|
else
|
||||||
|
blitz_log "${STEP}" "complete" "success" "5G dial completed but route wait was skipped because no interface could be resolved; refer to rndis_dial.py logs" 0
|
||||||
|
fi
|
||||||
219
scripts/boot/README.md
Normal file
219
scripts/boot/README.md
Normal file
@@ -0,0 +1,219 @@
|
|||||||
|
# Robot B-Side Boot Chain
|
||||||
|
|
||||||
|
This directory contains the robot-side boot and recovery scripts.
|
||||||
|
|
||||||
|
Normal usage is:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
sudo bash scripts/boot/install-systemd.sh
|
||||||
|
sudo systemctl start blitz-robot.target
|
||||||
|
```
|
||||||
|
|
||||||
|
After installation, `blitz-robot.target` is enabled and will start automatically on reboot.
|
||||||
|
|
||||||
|
To stop the chain now and disable boot-time autostart for future reboots:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
sudo bash scripts/boot/disable-systemd.sh
|
||||||
|
```
|
||||||
|
|
||||||
|
## Current Startup Order
|
||||||
|
|
||||||
|
The current cold-start chain is:
|
||||||
|
|
||||||
|
1. `blitz-boot-gate.service`
|
||||||
|
2. `blitz-5g-dial.service`
|
||||||
|
3. `blitz-ros-receiver.service`
|
||||||
|
4. `blitz-b-side-omnid.service`
|
||||||
|
5. `blitz-watchdog.service`
|
||||||
|
|
||||||
|
There is no longer any automatic time-sync step in the boot chain.
|
||||||
|
|
||||||
|
## What Each Script Does
|
||||||
|
|
||||||
|
- `robot-boot.env`: default boot configuration
|
||||||
|
- `robot-boot.env.local`: machine-local overrides
|
||||||
|
- `common.sh`: shared env loading, logging, and helper functions
|
||||||
|
- `boot-gate.sh`: fixed startup delay gate
|
||||||
|
- `5g-dial.sh`: brings up the 5G modem path and verifies routing
|
||||||
|
- `start-ros-receiver-service.sh`: boot wrapper for ROS receiver
|
||||||
|
- `wait-for-unix-socket.sh`: waits for the ROS receiver unix socket
|
||||||
|
- `start-b-side-omnid-service.sh`: boot wrapper for `b_side_omnid`
|
||||||
|
- `blitz-watchdog.sh`: runtime health watchdog and recovery orchestrator
|
||||||
|
- `blitz-fault-inject.sh`: fault injection entrypoint
|
||||||
|
- `install-systemd.sh`: installs systemd units into `/etc/systemd/system`
|
||||||
|
- `disable-systemd.sh`: stops the boot chain and disables autostart
|
||||||
|
|
||||||
|
## Important Configuration
|
||||||
|
|
||||||
|
Most machine-specific overrides should go into:
|
||||||
|
|
||||||
|
```text
|
||||||
|
scripts/boot/robot-boot.env.local
|
||||||
|
```
|
||||||
|
|
||||||
|
Typical settings:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
BLITZ_BOOT_DELAY_SEC="30"
|
||||||
|
BLITZ_LOG_FILE="/var/log/blitz-robot/startup.log"
|
||||||
|
BLITZ_RUNTIME_DIR="/run/blitz-robot"
|
||||||
|
|
||||||
|
BLITZ_5G_DIAL_DIR="${OMNISOCKETGO_ROOT}/scripts/boot"
|
||||||
|
BLITZ_5G_SERIAL_PORT="/dev/ttyUSB2"
|
||||||
|
BLITZ_5G_INTERFACE=""
|
||||||
|
BLITZ_5G_MODEM_SUBNET="192.168.224.0/22"
|
||||||
|
BLITZ_5G_GATEWAY="192.168.225.1"
|
||||||
|
BLITZ_5G_REMOVE_DEFAULT_ROUTE="1"
|
||||||
|
BLITZ_5G_ROUTE_TARGETS="106.55.173.235"
|
||||||
|
BLITZ_5G_INFO_JSON="${OMNISOCKETGO_ROOT}/scripts/boot/modem_network_info.json"
|
||||||
|
|
||||||
|
BLITZ_TIME_SERVER_IP="81.70.156.140"
|
||||||
|
|
||||||
|
BLITZ_ROS_USER="nvidia"
|
||||||
|
BLITZ_ROS_SOCKET_WAIT_SEC="20"
|
||||||
|
BLITZ_WATCHDOG_INTERVAL_SEC="5"
|
||||||
|
BLITZ_HEALTH_STALE_SEC="15"
|
||||||
|
BLITZ_OMNID_THREAD_HEARTBEAT_TIMEOUT_SEC="15"
|
||||||
|
BLITZ_NETWORK_FAIL_THRESHOLD="3"
|
||||||
|
BLITZ_NETWORK_RECOVERY_COOLDOWN_SEC="30"
|
||||||
|
BLITZ_GPS_MONITOR_ENABLED="1"
|
||||||
|
BLITZ_GPS_DEVICE_GLOB="/dev/ttyCH341USB*"
|
||||||
|
BLITZ_GPS_CHECK_INTERVAL_SEC="10"
|
||||||
|
BLITZ_GPS_RESTART_UNITS="gpsd.socket gpsd.service"
|
||||||
|
BLITZ_WATCHDOG_ALLOW_FAULT_INJECTION="0"
|
||||||
|
```
|
||||||
|
|
||||||
|
`BLITZ_TIME_SERVER_IP` is still used, but only as the 5G route/ping health-check target. It is no longer used for automatic clock synchronization.
|
||||||
|
|
||||||
|
If `BLITZ_TIME_SERVER_IP` is left empty, the scripts fall back to the host part of `ROBOT_SIDE_OMNISOCKET_SERVER_ADDR`.
|
||||||
|
|
||||||
|
## Install Or Upgrade
|
||||||
|
|
||||||
|
Run:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
sudo bash scripts/boot/install-systemd.sh
|
||||||
|
sudo systemctl daemon-reload
|
||||||
|
sudo systemctl restart blitz-robot.target
|
||||||
|
```
|
||||||
|
|
||||||
|
`install-systemd.sh` will also remove any old `blitz-time-sync.service` unit left over from earlier versions.
|
||||||
|
|
||||||
|
## Disable Autostart
|
||||||
|
|
||||||
|
To stop the currently running services and disable autostart for future reboots:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
sudo bash scripts/boot/disable-systemd.sh
|
||||||
|
```
|
||||||
|
|
||||||
|
To re-enable later:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
sudo bash scripts/boot/install-systemd.sh
|
||||||
|
sudo systemctl start blitz-robot.target
|
||||||
|
```
|
||||||
|
|
||||||
|
## Logs
|
||||||
|
|
||||||
|
All boot-chain and watchdog logs are appended to:
|
||||||
|
|
||||||
|
```text
|
||||||
|
/var/log/blitz-robot/startup.log
|
||||||
|
```
|
||||||
|
|
||||||
|
Follow the log live:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
sudo tail -f /var/log/blitz-robot/startup.log
|
||||||
|
```
|
||||||
|
|
||||||
|
Check service state:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
sudo systemctl status blitz-robot.target
|
||||||
|
sudo systemctl status blitz-5g-dial.service
|
||||||
|
sudo systemctl status blitz-ros-receiver.service
|
||||||
|
sudo systemctl status blitz-b-side-omnid.service
|
||||||
|
sudo systemctl status blitz-watchdog.service
|
||||||
|
```
|
||||||
|
|
||||||
|
Check systemd journal:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
sudo journalctl -u blitz-robot.target -u blitz-5g-dial.service \
|
||||||
|
-u blitz-ros-receiver.service -u blitz-b-side-omnid.service \
|
||||||
|
-u blitz-watchdog.service -f
|
||||||
|
```
|
||||||
|
|
||||||
|
## Runtime Status Files
|
||||||
|
|
||||||
|
The runtime status directory is:
|
||||||
|
|
||||||
|
```text
|
||||||
|
/run/blitz-robot
|
||||||
|
```
|
||||||
|
|
||||||
|
Key files:
|
||||||
|
|
||||||
|
- `b-side-omnid.status.json`
|
||||||
|
- `ros-receiver.status.json`
|
||||||
|
- `watchdog.status.json`
|
||||||
|
|
||||||
|
`watchdog.status.json` now also records `gps_ok` and `gps_device_present` so you can quickly tell whether the GPS USB serial node is currently visible and whether the last `gpsd` reconnect attempt succeeded.
|
||||||
|
|
||||||
|
Pretty-print them:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
sudo python3 -m json.tool /run/blitz-robot/watchdog.status.json
|
||||||
|
sudo python3 -m json.tool /run/blitz-robot/b-side-omnid.status.json
|
||||||
|
sudo python3 -m json.tool /run/blitz-robot/ros-receiver.status.json
|
||||||
|
```
|
||||||
|
|
||||||
|
## Fault Injection
|
||||||
|
|
||||||
|
Available test commands:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
sudo bash scripts/boot/blitz-fault-inject.sh bside-crash
|
||||||
|
sudo bash scripts/boot/blitz-fault-inject.sh bside-process-freeze
|
||||||
|
sudo bash scripts/boot/blitz-fault-inject.sh bside-video-thread-stall
|
||||||
|
sudo bash scripts/boot/blitz-fault-inject.sh bside-control-thread-stall
|
||||||
|
sudo bash scripts/boot/blitz-fault-inject.sh ros-crash
|
||||||
|
sudo bash scripts/boot/blitz-fault-inject.sh ros-freeze
|
||||||
|
```
|
||||||
|
|
||||||
|
For synthetic network fault injection, first enable it in `robot-boot.env.local`:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
BLITZ_WATCHDOG_ALLOW_FAULT_INJECTION="1"
|
||||||
|
```
|
||||||
|
|
||||||
|
Then restart watchdog and inject:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
sudo systemctl restart blitz-watchdog.service
|
||||||
|
sudo bash scripts/boot/blitz-fault-inject.sh network-down on
|
||||||
|
sudo bash scripts/boot/blitz-fault-inject.sh network-down off
|
||||||
|
```
|
||||||
|
|
||||||
|
## Recovery Behavior Summary
|
||||||
|
|
||||||
|
- If `b_side_omnid` dies or its status file goes stale, watchdog first tries a targeted `b_side` restart.
|
||||||
|
- If ROS receiver dies, loses its socket, or its heartbeat goes stale, watchdog performs an ordered full restart:
|
||||||
|
- stop `b_side`
|
||||||
|
- restart ROS receiver
|
||||||
|
- wait for unix socket
|
||||||
|
- start `b_side`
|
||||||
|
- If network checks fail repeatedly, watchdog stops `b_side`, runs `5g-dial.sh`, waits for route recovery, and then restores services.
|
||||||
|
- While 5G is healthy, watchdog keeps every host route listed by `BLITZ_TIME_SERVER_IP` and `BLITZ_5G_ROUTE_TARGETS` pinned to the resolved 5G interface. When 5G becomes unhealthy, watchdog deletes those host routes so traffic can fall back to the remaining default network path. If that fallback path is still reachable, watchdog keeps `b_side_omnid` running instead of treating it as a full network outage.
|
||||||
|
- Whenever watchdog changes or restores those host routes, it logs `route-path` lines for each target so you can see which interface Linux currently chooses for `81.70.156.140`, `106.55.173.235`, and any other configured 5G-pinned target.
|
||||||
|
- If GPS monitoring is enabled, watchdog checks `BLITZ_GPS_DEVICE_GLOB` every `BLITZ_GPS_CHECK_INTERVAL_SEC` seconds. When the GPS serial device disappears and later reappears, watchdog restarts the units in `BLITZ_GPS_RESTART_UNITS` so `gpsd` can bind to the new device node again.
|
||||||
|
- Camera disappearance is logged as degraded state. Reappearance triggers a `b_side` restart after the device is stable.
|
||||||
|
|
||||||
|
## Notes
|
||||||
|
|
||||||
|
- `time-sync.sh` and `blitz-time-sync.service` are intentionally removed from the automatic boot path.
|
||||||
|
- `b_side_omnid` must already be built before boot-time startup.
|
||||||
|
- `bin/b_side_omnid` missing, ROS env missing, or modem script missing will all show up in `startup.log`.
|
||||||
137
scripts/boot/blitz-5g-link-logger.sh
Normal file
137
scripts/boot/blitz-5g-link-logger.sh
Normal file
@@ -0,0 +1,137 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||||
|
# shellcheck disable=SC1091
|
||||||
|
source "${SCRIPT_DIR}/common.sh"
|
||||||
|
|
||||||
|
STEP="5g-link-logger"
|
||||||
|
|
||||||
|
resolve_target_ip() {
|
||||||
|
if [[ -n "${BLITZ_TIME_SERVER_IP:-}" ]]; then
|
||||||
|
printf '%s\n' "${BLITZ_TIME_SERVER_IP}"
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
|
||||||
|
for candidate in ${BLITZ_5G_ROUTE_TARGETS//,/ }; do
|
||||||
|
if [[ -n "${candidate}" ]]; then
|
||||||
|
printf '%s\n' "${candidate}"
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
return 1
|
||||||
|
}
|
||||||
|
|
||||||
|
emit_sample_json() {
|
||||||
|
local interface_name="${1:-}"
|
||||||
|
local target_ip="${2:-}"
|
||||||
|
|
||||||
|
python3 - "${interface_name}" "${target_ip}" <<'PY'
|
||||||
|
import json
|
||||||
|
import subprocess
|
||||||
|
import sys
|
||||||
|
import time
|
||||||
|
|
||||||
|
interface_name = sys.argv[1]
|
||||||
|
target_ip = sys.argv[2]
|
||||||
|
|
||||||
|
payload = {
|
||||||
|
"ts_unix_ms": time.time_ns() // 1_000_000,
|
||||||
|
"interface": interface_name,
|
||||||
|
"target_ip": target_ip,
|
||||||
|
"link_present": False,
|
||||||
|
"route_output": "",
|
||||||
|
"route_ok": False,
|
||||||
|
"probe_ok": False,
|
||||||
|
"ping_rtt_ms": None,
|
||||||
|
"rx_bytes": 0,
|
||||||
|
"tx_bytes": 0,
|
||||||
|
"rx_packets": 0,
|
||||||
|
"tx_packets": 0,
|
||||||
|
"rx_errors": 0,
|
||||||
|
"tx_errors": 0,
|
||||||
|
"rx_drops": 0,
|
||||||
|
"tx_drops": 0,
|
||||||
|
}
|
||||||
|
|
||||||
|
if interface_name:
|
||||||
|
try:
|
||||||
|
output = subprocess.check_output(
|
||||||
|
["ip", "-j", "-s", "link", "show", "dev", interface_name],
|
||||||
|
text=True,
|
||||||
|
stderr=subprocess.DEVNULL,
|
||||||
|
)
|
||||||
|
stats = json.loads(output)
|
||||||
|
if stats:
|
||||||
|
item = stats[0]
|
||||||
|
payload["link_present"] = True
|
||||||
|
rx = item.get("stats64", {}).get("rx", {})
|
||||||
|
tx = item.get("stats64", {}).get("tx", {})
|
||||||
|
if not rx and not tx:
|
||||||
|
rx = item.get("stats", {}).get("rx", {})
|
||||||
|
tx = item.get("stats", {}).get("tx", {})
|
||||||
|
payload["rx_bytes"] = int(rx.get("bytes") or 0)
|
||||||
|
payload["tx_bytes"] = int(tx.get("bytes") or 0)
|
||||||
|
payload["rx_packets"] = int(rx.get("packets") or 0)
|
||||||
|
payload["tx_packets"] = int(tx.get("packets") or 0)
|
||||||
|
payload["rx_errors"] = int(rx.get("errors") or 0)
|
||||||
|
payload["tx_errors"] = int(tx.get("errors") or 0)
|
||||||
|
payload["rx_drops"] = int(rx.get("dropped") or 0)
|
||||||
|
payload["tx_drops"] = int(tx.get("dropped") or 0)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
if target_ip:
|
||||||
|
try:
|
||||||
|
route = subprocess.check_output(
|
||||||
|
["ip", "route", "get", target_ip],
|
||||||
|
text=True,
|
||||||
|
stderr=subprocess.STDOUT,
|
||||||
|
).strip()
|
||||||
|
payload["route_output"] = route.splitlines()[0] if route else ""
|
||||||
|
payload["route_ok"] = bool(payload["route_output"]) and (
|
||||||
|
not interface_name or f" dev {interface_name}" in payload["route_output"]
|
||||||
|
)
|
||||||
|
except Exception as exc:
|
||||||
|
payload["route_output"] = str(exc)
|
||||||
|
|
||||||
|
ping_cmd = ["ping", "-c", "1", "-W", "2", target_ip]
|
||||||
|
if interface_name:
|
||||||
|
ping_cmd[1:1] = ["-I", interface_name]
|
||||||
|
ping = subprocess.run(ping_cmd, capture_output=True, text=True)
|
||||||
|
payload["probe_ok"] = ping.returncode == 0
|
||||||
|
output = (ping.stdout or "") + "\n" + (ping.stderr or "")
|
||||||
|
for token in output.replace("\n", " ").split():
|
||||||
|
if token.startswith("time="):
|
||||||
|
value = token.split("=", 1)[1].rstrip("ms")
|
||||||
|
try:
|
||||||
|
payload["ping_rtt_ms"] = float(value)
|
||||||
|
except ValueError:
|
||||||
|
pass
|
||||||
|
break
|
||||||
|
|
||||||
|
print(json.dumps(payload, separators=(",", ":"), ensure_ascii=False))
|
||||||
|
PY
|
||||||
|
}
|
||||||
|
|
||||||
|
if [[ "${OMNI_BOOT_MODE:-0}" == "1" ]]; then
|
||||||
|
blitz_load_boot_env
|
||||||
|
blitz_require_run_context
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [[ -z "${BLITZ_RUN_DIR:-}" && -f "${BLITZ_RUN_CONTEXT_FILE:-}" ]]; then
|
||||||
|
blitz_load_run_context_env || true
|
||||||
|
fi
|
||||||
|
blitz_ensure_instance_id
|
||||||
|
|
||||||
|
export BLITZ_5G_LINK_LOG_PATH="${BLITZ_5G_LINK_LOG_PATH:-${BLITZ_RUN_DIR}/b-5g-link-quality.${BLITZ_INSTANCE_ID}.jsonl}"
|
||||||
|
target_ip="$(resolve_target_ip || true)"
|
||||||
|
|
||||||
|
blitz_log "${STEP}" "start" "start" "path=${BLITZ_5G_LINK_LOG_PATH} interval_sec=${BLITZ_5G_LINK_LOG_INTERVAL_SEC}" 0
|
||||||
|
|
||||||
|
while true; do
|
||||||
|
interface_name="$(blitz_resolve_5g_interface || true)"
|
||||||
|
line="$(emit_sample_json "${interface_name}" "${target_ip}")"
|
||||||
|
blitz_jsonl_append_line "${BLITZ_5G_LINK_LOG_PATH}" "${line}"
|
||||||
|
sleep "${BLITZ_5G_LINK_LOG_INTERVAL_SEC}"
|
||||||
|
done
|
||||||
139
scripts/boot/blitz-fault-inject.sh
Normal file
139
scripts/boot/blitz-fault-inject.sh
Normal file
@@ -0,0 +1,139 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||||
|
# shellcheck disable=SC1091
|
||||||
|
source "${SCRIPT_DIR}/common.sh"
|
||||||
|
|
||||||
|
STEP="fault-inject"
|
||||||
|
B_SIDE_SERVICE="blitz-b-side-omnid.service"
|
||||||
|
ROS_SERVICE="blitz-ros-receiver.service"
|
||||||
|
|
||||||
|
main_pid_for_service() {
|
||||||
|
local service_name="$1"
|
||||||
|
systemctl show --property MainPID --value "${service_name}"
|
||||||
|
}
|
||||||
|
|
||||||
|
wait_for_service_pid_change() {
|
||||||
|
local service_name="$1"
|
||||||
|
local previous_pid="$2"
|
||||||
|
local timeout_sec="${3:-10}"
|
||||||
|
local waited=0
|
||||||
|
local current_pid=""
|
||||||
|
|
||||||
|
while (( waited < timeout_sec )); do
|
||||||
|
current_pid="$(main_pid_for_service "${service_name}")"
|
||||||
|
if [[ -n "${current_pid}" && "${current_pid}" != "0" && "${current_pid}" != "${previous_pid}" ]]; then
|
||||||
|
printf '%s\n' "${current_pid}"
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
sleep 1
|
||||||
|
waited=$(( waited + 1 ))
|
||||||
|
done
|
||||||
|
|
||||||
|
return 1
|
||||||
|
}
|
||||||
|
|
||||||
|
require_running_pid() {
|
||||||
|
local service_name="$1"
|
||||||
|
local pid
|
||||||
|
|
||||||
|
pid="$(main_pid_for_service "${service_name}")"
|
||||||
|
if [[ -z "${pid}" || "${pid}" == "0" ]]; then
|
||||||
|
blitz_log "${STEP}" "lookup-pid" "failure" "service=${service_name}" 1
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
printf '%s\n' "${pid}"
|
||||||
|
}
|
||||||
|
|
||||||
|
write_fault_flag() {
|
||||||
|
local flag_name="$1"
|
||||||
|
local flag_path="${BLITZ_RUNTIME_DIR}/${flag_name}"
|
||||||
|
printf '%s\n' "$(date +%s)" > "${flag_path}"
|
||||||
|
blitz_log "${STEP}" "flag-on" "success" "path=${flag_path}" 0
|
||||||
|
}
|
||||||
|
|
||||||
|
clear_fault_flag() {
|
||||||
|
local flag_name="$1"
|
||||||
|
local flag_path="${BLITZ_RUNTIME_DIR}/${flag_name}"
|
||||||
|
rm -f "${flag_path}"
|
||||||
|
blitz_log "${STEP}" "flag-off" "success" "path=${flag_path}" 0
|
||||||
|
}
|
||||||
|
|
||||||
|
blitz_load_boot_env
|
||||||
|
blitz_require_root "${STEP}"
|
||||||
|
blitz_prepare_runtime_dir
|
||||||
|
|
||||||
|
case "${1:-}" in
|
||||||
|
bside-crash)
|
||||||
|
target_pid="$(require_running_pid "${B_SIDE_SERVICE}")"
|
||||||
|
blitz_log "${STEP}" "bside-crash" "start" "service=${B_SIDE_SERVICE} pid=${target_pid}" 0
|
||||||
|
kill -9 "${target_pid}"
|
||||||
|
if restarted_pid="$(wait_for_service_pid_change "${B_SIDE_SERVICE}" "${target_pid}")"; then
|
||||||
|
blitz_log "${STEP}" "bside-crash" "success" "old_pid=${target_pid} new_pid=${restarted_pid}" 0
|
||||||
|
else
|
||||||
|
blitz_log "${STEP}" "bside-crash" "failure" "old_pid=${target_pid} restart_not_observed_within=10s" 1
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
;;
|
||||||
|
bside-process-freeze)
|
||||||
|
target_pid="$(require_running_pid "${B_SIDE_SERVICE}")"
|
||||||
|
blitz_log "${STEP}" "bside-process-freeze" "start" "service=${B_SIDE_SERVICE} pid=${target_pid}" 0
|
||||||
|
kill -STOP "${target_pid}"
|
||||||
|
blitz_log "${STEP}" "bside-process-freeze" "success" "service=${B_SIDE_SERVICE} pid=${target_pid}" 0
|
||||||
|
;;
|
||||||
|
bside-video-thread-stall)
|
||||||
|
write_fault_flag "fault-injection-bside-video-thread-stall"
|
||||||
|
;;
|
||||||
|
bside-control-thread-stall)
|
||||||
|
write_fault_flag "fault-injection-bside-control-thread-stall"
|
||||||
|
;;
|
||||||
|
ros-crash)
|
||||||
|
target_pid="$(require_running_pid "${ROS_SERVICE}")"
|
||||||
|
blitz_log "${STEP}" "ros-crash" "start" "service=${ROS_SERVICE} pid=${target_pid}" 0
|
||||||
|
kill -9 "${target_pid}"
|
||||||
|
if restarted_pid="$(wait_for_service_pid_change "${ROS_SERVICE}" "${target_pid}")"; then
|
||||||
|
blitz_log "${STEP}" "ros-crash" "success" "old_pid=${target_pid} new_pid=${restarted_pid}" 0
|
||||||
|
else
|
||||||
|
blitz_log "${STEP}" "ros-crash" "failure" "old_pid=${target_pid} restart_not_observed_within=10s" 1
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
;;
|
||||||
|
ros-freeze)
|
||||||
|
target_pid="$(require_running_pid "${ROS_SERVICE}")"
|
||||||
|
blitz_log "${STEP}" "ros-freeze" "start" "service=${ROS_SERVICE} pid=${target_pid}" 0
|
||||||
|
kill -STOP "${target_pid}"
|
||||||
|
blitz_log "${STEP}" "ros-freeze" "success" "service=${ROS_SERVICE} pid=${target_pid}" 0
|
||||||
|
;;
|
||||||
|
network-down)
|
||||||
|
if [[ "${BLITZ_WATCHDOG_ALLOW_FAULT_INJECTION}" != "1" ]]; then
|
||||||
|
blitz_log "${STEP}" "network-down" "failure" "set BLITZ_WATCHDOG_ALLOW_FAULT_INJECTION=1 first" 1
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
case "${2:-}" in
|
||||||
|
on)
|
||||||
|
write_fault_flag "fault-injection-network-down"
|
||||||
|
;;
|
||||||
|
off)
|
||||||
|
clear_fault_flag "fault-injection-network-down"
|
||||||
|
;;
|
||||||
|
*)
|
||||||
|
echo "usage: $0 network-down on|off" >&2
|
||||||
|
exit 2
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
;;
|
||||||
|
*)
|
||||||
|
cat <<'EOF'
|
||||||
|
usage:
|
||||||
|
blitz-fault-inject.sh bside-crash
|
||||||
|
blitz-fault-inject.sh bside-process-freeze
|
||||||
|
blitz-fault-inject.sh bside-video-thread-stall
|
||||||
|
blitz-fault-inject.sh bside-control-thread-stall
|
||||||
|
blitz-fault-inject.sh ros-crash
|
||||||
|
blitz-fault-inject.sh ros-freeze
|
||||||
|
blitz-fault-inject.sh network-down on|off
|
||||||
|
EOF
|
||||||
|
exit 2
|
||||||
|
;;
|
||||||
|
esac
|
||||||
50
scripts/boot/blitz-incident-capture-launch.sh
Normal file
50
scripts/boot/blitz-incident-capture-launch.sh
Normal file
@@ -0,0 +1,50 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||||
|
# shellcheck disable=SC1091
|
||||||
|
source "${SCRIPT_DIR}/common.sh"
|
||||||
|
|
||||||
|
STEP="incident-launch"
|
||||||
|
incident_id=""
|
||||||
|
args=()
|
||||||
|
timeout_bin=""
|
||||||
|
|
||||||
|
while (($# > 0)); do
|
||||||
|
case "$1" in
|
||||||
|
--incident-id)
|
||||||
|
incident_id="${2:-}"
|
||||||
|
shift 2
|
||||||
|
;;
|
||||||
|
*)
|
||||||
|
args+=("$1")
|
||||||
|
shift
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
done
|
||||||
|
|
||||||
|
blitz_load_boot_env
|
||||||
|
blitz_require_root "${STEP}"
|
||||||
|
blitz_require_command systemd-run "${STEP}"
|
||||||
|
blitz_require_command timeout "${STEP}"
|
||||||
|
timeout_bin="$(command -v timeout)"
|
||||||
|
|
||||||
|
if [[ -z "${incident_id}" ]]; then
|
||||||
|
incident_id="$(blitz_new_incident_id)"
|
||||||
|
fi
|
||||||
|
|
||||||
|
unit_name="blitz-incident-${incident_id//[^A-Za-z0-9_.-]/-}"
|
||||||
|
|
||||||
|
systemd-run \
|
||||||
|
--quiet \
|
||||||
|
--collect \
|
||||||
|
--unit "${unit_name}" \
|
||||||
|
--property=Type=oneshot \
|
||||||
|
--property="StandardOutput=append:${BLITZ_LOG_FILE}" \
|
||||||
|
--property="StandardError=append:${BLITZ_LOG_FILE}" \
|
||||||
|
"${timeout_bin}" "${BLITZ_INCIDENT_TOTAL_TIMEOUT_SEC}s" \
|
||||||
|
/bin/bash "${SCRIPT_DIR}/blitz-incident-capture.sh" \
|
||||||
|
--incident-id "${incident_id}" \
|
||||||
|
"${args[@]}"
|
||||||
|
|
||||||
|
printf '%s\n' "${incident_id}"
|
||||||
131
scripts/boot/blitz-incident-capture.sh
Normal file
131
scripts/boot/blitz-incident-capture.sh
Normal file
@@ -0,0 +1,131 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||||
|
# shellcheck disable=SC1091
|
||||||
|
source "${SCRIPT_DIR}/common.sh"
|
||||||
|
|
||||||
|
STEP="incident-capture"
|
||||||
|
incident_id=""
|
||||||
|
incident_source=""
|
||||||
|
incident_reason=""
|
||||||
|
incident_unit=""
|
||||||
|
incident_result=""
|
||||||
|
incident_exit_status=""
|
||||||
|
|
||||||
|
run_capture() {
|
||||||
|
local output_path="$1"
|
||||||
|
shift
|
||||||
|
|
||||||
|
if command -v timeout >/dev/null 2>&1; then
|
||||||
|
timeout "${BLITZ_INCIDENT_COMMAND_TIMEOUT_SEC}s" "$@" > "${output_path}" 2>&1 || true
|
||||||
|
else
|
||||||
|
"$@" > "${output_path}" 2>&1 || true
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
while (($# > 0)); do
|
||||||
|
case "$1" in
|
||||||
|
--incident-id)
|
||||||
|
incident_id="${2:-}"
|
||||||
|
shift 2
|
||||||
|
;;
|
||||||
|
--source)
|
||||||
|
incident_source="${2:-}"
|
||||||
|
shift 2
|
||||||
|
;;
|
||||||
|
--reason)
|
||||||
|
incident_reason="${2:-}"
|
||||||
|
shift 2
|
||||||
|
;;
|
||||||
|
--unit)
|
||||||
|
incident_unit="${2:-}"
|
||||||
|
shift 2
|
||||||
|
;;
|
||||||
|
--result)
|
||||||
|
incident_result="${2:-}"
|
||||||
|
shift 2
|
||||||
|
;;
|
||||||
|
--exit-status)
|
||||||
|
incident_exit_status="${2:-}"
|
||||||
|
shift 2
|
||||||
|
;;
|
||||||
|
*)
|
||||||
|
blitz_log "${STEP}" "parse-arg" "failure" "unknown argument: $1" 2
|
||||||
|
exit 2
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
done
|
||||||
|
|
||||||
|
if [[ -n "${incident_result}" && "${incident_result}" == "success" ]]; then
|
||||||
|
exit 0
|
||||||
|
fi
|
||||||
|
|
||||||
|
blitz_load_boot_env
|
||||||
|
blitz_load_run_context_env || true
|
||||||
|
blitz_prepare_runtime_dir
|
||||||
|
blitz_prepare_run_root
|
||||||
|
|
||||||
|
if [[ -z "${incident_id}" ]]; then
|
||||||
|
incident_id="$(blitz_new_incident_id)"
|
||||||
|
fi
|
||||||
|
|
||||||
|
incident_dir="${BLITZ_RUN_ROOT}/incidents/${incident_id}"
|
||||||
|
mkdir -p "${incident_dir}"
|
||||||
|
|
||||||
|
python3 - "${incident_dir}/incident.json" "${incident_id}" "${BLITZ_RUN_ID:-}" "${incident_source}" "${incident_reason}" "${incident_unit}" "${incident_result}" "${incident_exit_status}" "${BLITZ_RUN_DIR:-}" "${HOSTNAME:-$(hostname)}" <<'PY'
|
||||||
|
import json
|
||||||
|
import sys
|
||||||
|
import time
|
||||||
|
|
||||||
|
path, incident_id, run_id, source, reason, unit, result, exit_status, run_dir, hostname = sys.argv[1:10]
|
||||||
|
payload = {
|
||||||
|
"incident_id": incident_id,
|
||||||
|
"run_id": run_id,
|
||||||
|
"source": source,
|
||||||
|
"fault_reason": reason,
|
||||||
|
"unit": unit,
|
||||||
|
"service_result": result,
|
||||||
|
"exit_status": exit_status,
|
||||||
|
"run_dir": run_dir,
|
||||||
|
"hostname": hostname,
|
||||||
|
"captured_at": time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()),
|
||||||
|
}
|
||||||
|
with open(path, "w", encoding="utf-8") as handle:
|
||||||
|
json.dump(payload, handle, ensure_ascii=False, indent=2, sort_keys=True)
|
||||||
|
PY
|
||||||
|
|
||||||
|
for status_file in \
|
||||||
|
"${BLITZ_RUNTIME_DIR}/watchdog.status.json" \
|
||||||
|
"${BLITZ_RUNTIME_DIR}/b-side-omnid.status.json" \
|
||||||
|
"${BLITZ_RUNTIME_DIR}/ros-receiver.status.json"
|
||||||
|
do
|
||||||
|
if [[ -f "${status_file}" ]]; then
|
||||||
|
cp -f "${status_file}" "${incident_dir}/$(basename "${status_file}")"
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
|
||||||
|
if [[ -f "${BLITZ_LOG_FILE}" ]]; then
|
||||||
|
tail -n 400 "${BLITZ_LOG_FILE}" > "${incident_dir}/startup.log.tail"
|
||||||
|
fi
|
||||||
|
|
||||||
|
run_capture "${incident_dir}/systemctl-status.txt" \
|
||||||
|
systemctl status blitz-robot.target blitz-run-context.service blitz-5g-dial.service blitz-5g-link-logger.service blitz-ros-receiver.service blitz-b-side-omnid.service blitz-watchdog.service
|
||||||
|
run_capture "${incident_dir}/journal.txt" \
|
||||||
|
journalctl --no-pager --since "5 minutes ago" -u blitz-run-context.service -u blitz-5g-dial.service -u blitz-5g-link-logger.service -u blitz-ros-receiver.service -u blitz-b-side-omnid.service -u blitz-watchdog.service
|
||||||
|
run_capture "${incident_dir}/ip-addr.txt" ip addr
|
||||||
|
run_capture "${incident_dir}/ip-route.txt" ip route
|
||||||
|
run_capture "${incident_dir}/ss-uapn.txt" ss -uapn
|
||||||
|
run_capture "${incident_dir}/ss-xlp.txt" ss -xlp
|
||||||
|
|
||||||
|
if [[ -f "${BLITZ_5G_INFO_JSON:-}" ]]; then
|
||||||
|
cp -f "${BLITZ_5G_INFO_JSON}" "${incident_dir}/$(basename "${BLITZ_5G_INFO_JSON}")"
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [[ -n "${BLITZ_RUN_DIR:-}" && -d "${BLITZ_RUN_DIR}" ]]; then
|
||||||
|
while IFS= read -r -d '' jsonl; do
|
||||||
|
tail -n 200 "${jsonl}" > "${incident_dir}/tail-$(basename "${jsonl}")"
|
||||||
|
done < <(find "${BLITZ_RUN_DIR}" -maxdepth 1 -type f -name '*.jsonl' -print0 2>/dev/null)
|
||||||
|
fi
|
||||||
|
|
||||||
|
blitz_log "${STEP}" "complete" "success" "incident_id=${incident_id} path=${incident_dir}" 0
|
||||||
22
scripts/boot/blitz-run-context.sh
Normal file
22
scripts/boot/blitz-run-context.sh
Normal file
@@ -0,0 +1,22 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||||
|
# shellcheck disable=SC1091
|
||||||
|
source "${SCRIPT_DIR}/common.sh"
|
||||||
|
|
||||||
|
STEP="run-context"
|
||||||
|
|
||||||
|
on_error() {
|
||||||
|
local rc="$?"
|
||||||
|
blitz_log "${STEP}" "error" "failure" "line=${1:-unknown} cmd=${BASH_COMMAND:-unknown}" "${rc}"
|
||||||
|
exit "${rc}"
|
||||||
|
}
|
||||||
|
|
||||||
|
trap 'on_error "${LINENO}"' ERR
|
||||||
|
|
||||||
|
blitz_load_boot_env
|
||||||
|
blitz_require_root "${STEP}"
|
||||||
|
blitz_require_command python3 "${STEP}"
|
||||||
|
blitz_init_run_context
|
||||||
|
blitz_log "${STEP}" "complete" "success" "run_id=${BLITZ_RUN_ID} run_dir=${BLITZ_RUN_DIR}" 0
|
||||||
971
scripts/boot/blitz-watchdog.sh
Normal file
971
scripts/boot/blitz-watchdog.sh
Normal file
@@ -0,0 +1,971 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||||
|
# shellcheck disable=SC1091
|
||||||
|
source "${SCRIPT_DIR}/common.sh"
|
||||||
|
|
||||||
|
STEP="watchdog"
|
||||||
|
B_SIDE_SERVICE="blitz-b-side-omnid.service"
|
||||||
|
ROS_SERVICE="blitz-ros-receiver.service"
|
||||||
|
B_SIDE_STATUS_FILE=""
|
||||||
|
ROS_STATUS_FILE=""
|
||||||
|
WATCHDOG_STATUS_FILE=""
|
||||||
|
NETWORK_FAULT_FILE=""
|
||||||
|
WATCHDOG_EVENT_LOG=""
|
||||||
|
WATCHDOG_SAMPLE_LOG=""
|
||||||
|
WATCHDOG_EVENT_LOG_FAILURE_REPORTED=0
|
||||||
|
WATCHDOG_SAMPLE_LOG_FAILURE_REPORTED=0
|
||||||
|
CAMERA_MISSING_PREV=0
|
||||||
|
CAMERA_RECOVERY_STABLE_COUNT=0
|
||||||
|
NETWORK_FAIL_COUNT=0
|
||||||
|
NETWORK_COOLDOWN_UNTIL=0
|
||||||
|
BACKOFF_UNTIL=0
|
||||||
|
LAST_ACTION="none"
|
||||||
|
LAST_ACTION_EPOCH_MS=0
|
||||||
|
FULL_RESTART_WINDOW_START=0
|
||||||
|
FULL_RESTART_WINDOW_COUNT=0
|
||||||
|
NETWORK_LAST_INTERFACE=""
|
||||||
|
NETWORK_ROUTE_INTERFACE_LAST_KNOWN=""
|
||||||
|
NETWORK_PRIMARY_LAST_RETRY_SEC=0
|
||||||
|
GPS_LAST_CHECK_SEC=0
|
||||||
|
GPS_DEVICE_PRESENT_PREV=-1
|
||||||
|
GPS_DEVICE_PRESENT_STATE=1
|
||||||
|
GPS_STACK_ACTIVE_STATE=1
|
||||||
|
LAST_REPORTED_FAULT_REASON=""
|
||||||
|
LAST_REPORTED_RECOVERY_STATE=""
|
||||||
|
declare -A TARGETED_RESTART_WINDOW_START=()
|
||||||
|
declare -A TARGETED_RESTART_WINDOW_COUNT=()
|
||||||
|
|
||||||
|
now_epoch_sec() {
|
||||||
|
date +%s
|
||||||
|
}
|
||||||
|
|
||||||
|
now_epoch_ms() {
|
||||||
|
date +%s%3N
|
||||||
|
}
|
||||||
|
|
||||||
|
service_is_active() {
|
||||||
|
systemctl is-active --quiet "$1"
|
||||||
|
}
|
||||||
|
|
||||||
|
gps_monitor_enabled() {
|
||||||
|
[[ "${BLITZ_GPS_MONITOR_ENABLED:-0}" == "1" ]]
|
||||||
|
}
|
||||||
|
|
||||||
|
gps_stack_active() {
|
||||||
|
local units=()
|
||||||
|
local unit
|
||||||
|
|
||||||
|
read -r -a units <<< "${BLITZ_GPS_RESTART_UNITS:-}"
|
||||||
|
if (( ${#units[@]} == 0 )); then
|
||||||
|
return 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
for unit in "${units[@]}"; do
|
||||||
|
if service_is_active "${unit}"; then
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
return 1
|
||||||
|
}
|
||||||
|
|
||||||
|
restart_gps_stack() {
|
||||||
|
local reason="$1"
|
||||||
|
local devices="$2"
|
||||||
|
local units=()
|
||||||
|
local rc
|
||||||
|
|
||||||
|
read -r -a units <<< "${BLITZ_GPS_RESTART_UNITS:-}"
|
||||||
|
if (( ${#units[@]} == 0 )); then
|
||||||
|
GPS_STACK_ACTIVE_STATE=0
|
||||||
|
blitz_log "${STEP}" "gps-reconnect" "failure" "reason=${reason} devices=${devices} units=empty" 1
|
||||||
|
return 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
set_last_action "gps-reconnect"
|
||||||
|
blitz_log "${STEP}" "gps-reconnect" "start" "reason=${reason} devices=${devices} units=${BLITZ_GPS_RESTART_UNITS}" 0
|
||||||
|
if systemctl restart "${units[@]}"; then
|
||||||
|
GPS_STACK_ACTIVE_STATE=1
|
||||||
|
blitz_log "${STEP}" "gps-reconnect" "success" "reason=${reason} devices=${devices} units=${BLITZ_GPS_RESTART_UNITS}" 0
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
|
||||||
|
rc=$?
|
||||||
|
GPS_STACK_ACTIVE_STATE=0
|
||||||
|
blitz_log "${STEP}" "gps-reconnect" "failure" "reason=${reason} devices=${devices} units=${BLITZ_GPS_RESTART_UNITS}" "${rc}"
|
||||||
|
return "${rc}"
|
||||||
|
}
|
||||||
|
|
||||||
|
check_gps_health() {
|
||||||
|
local now_sec="$1"
|
||||||
|
local check_interval_sec="${BLITZ_GPS_CHECK_INTERVAL_SEC:-10}"
|
||||||
|
local device_glob="${BLITZ_GPS_DEVICE_GLOB:-}"
|
||||||
|
local previous_present="${GPS_DEVICE_PRESENT_PREV}"
|
||||||
|
local recovery_reason=""
|
||||||
|
local device_summary=""
|
||||||
|
local -a devices=()
|
||||||
|
|
||||||
|
if ! gps_monitor_enabled; then
|
||||||
|
GPS_DEVICE_PRESENT_STATE=1
|
||||||
|
GPS_STACK_ACTIVE_STATE=1
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
|
||||||
|
if (( check_interval_sec < 1 )); then
|
||||||
|
check_interval_sec=1
|
||||||
|
fi
|
||||||
|
if (( GPS_LAST_CHECK_SEC != 0 && now_sec - GPS_LAST_CHECK_SEC < check_interval_sec )); then
|
||||||
|
if (( GPS_DEVICE_PRESENT_STATE == 1 && GPS_STACK_ACTIVE_STATE == 1 )); then
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
return 1
|
||||||
|
fi
|
||||||
|
GPS_LAST_CHECK_SEC="${now_sec}"
|
||||||
|
|
||||||
|
mapfile -t devices < <(compgen -G "${device_glob}" || true)
|
||||||
|
if (( ${#devices[@]} == 0 )); then
|
||||||
|
GPS_DEVICE_PRESENT_STATE=0
|
||||||
|
GPS_STACK_ACTIVE_STATE=0
|
||||||
|
if (( previous_present != 0 )); then
|
||||||
|
blitz_log "${STEP}" "gps-device-check" "failure" "state=missing glob=${device_glob}" 1
|
||||||
|
fi
|
||||||
|
GPS_DEVICE_PRESENT_PREV=0
|
||||||
|
return 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
device_summary="$(IFS=,; printf '%s' "${devices[*]}")"
|
||||||
|
GPS_DEVICE_PRESENT_STATE=1
|
||||||
|
GPS_DEVICE_PRESENT_PREV=1
|
||||||
|
|
||||||
|
if (( previous_present == 0 )); then
|
||||||
|
blitz_log "${STEP}" "gps-device-check" "success" "state=reappeared devices=${device_summary}" 0
|
||||||
|
recovery_reason="device-reappeared"
|
||||||
|
elif ! gps_stack_active; then
|
||||||
|
recovery_reason="gpsd-inactive"
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [[ -n "${recovery_reason}" ]]; then
|
||||||
|
if restart_gps_stack "${recovery_reason}" "${device_summary}"; then
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
return 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
GPS_STACK_ACTIVE_STATE=1
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
|
||||||
|
status_file_fresh() {
|
||||||
|
local path="$1"
|
||||||
|
local max_age_sec="$2"
|
||||||
|
local now_sec
|
||||||
|
local mtime_sec
|
||||||
|
|
||||||
|
if [[ ! -f "${path}" ]]; then
|
||||||
|
return 1
|
||||||
|
fi
|
||||||
|
now_sec="$(now_epoch_sec)"
|
||||||
|
mtime_sec="$(stat -c %Y "${path}" 2>/dev/null || echo 0)"
|
||||||
|
(( now_sec - mtime_sec <= max_age_sec ))
|
||||||
|
}
|
||||||
|
|
||||||
|
ros_receiver_status_fresh() {
|
||||||
|
local path="$1"
|
||||||
|
local max_age_sec="$2"
|
||||||
|
local now_epoch_ms_value
|
||||||
|
|
||||||
|
now_epoch_ms_value="$(now_epoch_ms)"
|
||||||
|
python3 - "${path}" "${now_epoch_ms_value}" "${max_age_sec}" <<'PY'
|
||||||
|
import json
|
||||||
|
import sys
|
||||||
|
|
||||||
|
path = sys.argv[1]
|
||||||
|
now_epoch_ms = int(sys.argv[2])
|
||||||
|
max_age_ms = int(sys.argv[3]) * 1000
|
||||||
|
|
||||||
|
try:
|
||||||
|
with open(path, "r", encoding="utf-8") as handle:
|
||||||
|
payload = json.load(handle)
|
||||||
|
except Exception:
|
||||||
|
raise SystemExit(1)
|
||||||
|
|
||||||
|
heartbeat_ms = int(payload.get("recv_thread_heartbeat_epoch_ms") or 0)
|
||||||
|
socket_bound = bool(payload.get("socket_bound"))
|
||||||
|
|
||||||
|
if heartbeat_ms <= 0 or not socket_bound:
|
||||||
|
raise SystemExit(1)
|
||||||
|
|
||||||
|
raise SystemExit(0 if now_epoch_ms - heartbeat_ms <= max_age_ms else 1)
|
||||||
|
PY
|
||||||
|
}
|
||||||
|
|
||||||
|
ros_receiver_healthy() {
|
||||||
|
local max_age_sec="$1"
|
||||||
|
|
||||||
|
service_is_active "${ROS_SERVICE}" \
|
||||||
|
&& [[ -S "${ROBOT_RECEIVER_LOCAL_SOCKET_PATH}" ]] \
|
||||||
|
&& status_file_fresh "${ROS_STATUS_FILE}" "${max_age_sec}" \
|
||||||
|
&& ros_receiver_status_fresh "${ROS_STATUS_FILE}" "${max_age_sec}"
|
||||||
|
}
|
||||||
|
|
||||||
|
write_watchdog_status() {
|
||||||
|
local fault_reason="$1"
|
||||||
|
local recovery_state="$2"
|
||||||
|
local network_ok="$3"
|
||||||
|
local camera_ok="$4"
|
||||||
|
local ros_ok="$5"
|
||||||
|
local bside_ok="$6"
|
||||||
|
local gps_ok="$7"
|
||||||
|
local gps_device_present="$8"
|
||||||
|
local tmp_file
|
||||||
|
|
||||||
|
tmp_file="${WATCHDOG_STATUS_FILE}.tmp.$$"
|
||||||
|
cat > "${tmp_file}" <<EOF
|
||||||
|
{
|
||||||
|
"updated_at_epoch_ms": $(now_epoch_ms),
|
||||||
|
"fault_reason": "${fault_reason}",
|
||||||
|
"recovery_state": "${recovery_state}",
|
||||||
|
"network_ok": ${network_ok},
|
||||||
|
"camera_ok": ${camera_ok},
|
||||||
|
"ros_ok": ${ros_ok},
|
||||||
|
"bside_ok": ${bside_ok},
|
||||||
|
"gps_ok": ${gps_ok},
|
||||||
|
"gps_device_present": ${gps_device_present},
|
||||||
|
"network_fail_count": ${NETWORK_FAIL_COUNT},
|
||||||
|
"targeted_restart_count": $(targeted_restart_total),
|
||||||
|
"full_restart_count": ${FULL_RESTART_WINDOW_COUNT},
|
||||||
|
"last_action": "${LAST_ACTION}",
|
||||||
|
"last_action_epoch_ms": ${LAST_ACTION_EPOCH_MS}
|
||||||
|
}
|
||||||
|
EOF
|
||||||
|
mv -f "${tmp_file}" "${WATCHDOG_STATUS_FILE}"
|
||||||
|
}
|
||||||
|
|
||||||
|
watchdog_emit_json() {
|
||||||
|
local record_type="$1"
|
||||||
|
local action="$2"
|
||||||
|
local fault_reason="$3"
|
||||||
|
local recovery_state="$4"
|
||||||
|
local detail="$5"
|
||||||
|
local incident_id="${6:-}"
|
||||||
|
local network_ok="${7:-1}"
|
||||||
|
local camera_ok="${8:-1}"
|
||||||
|
local ros_ok="${9:-1}"
|
||||||
|
local bside_ok="${10:-1}"
|
||||||
|
local gps_ok="${11:-1}"
|
||||||
|
local gps_device_present="${12:-1}"
|
||||||
|
|
||||||
|
python3 - "${record_type}" "${action}" "${fault_reason}" "${recovery_state}" "${detail}" "${incident_id}" "${network_ok}" "${camera_ok}" "${ros_ok}" "${bside_ok}" "${gps_ok}" "${gps_device_present}" "${LAST_ACTION}" "${LAST_ACTION_EPOCH_MS}" "${NETWORK_FAIL_COUNT}" "$(targeted_restart_total)" "${FULL_RESTART_WINDOW_COUNT}" <<'PY'
|
||||||
|
import json
|
||||||
|
import sys
|
||||||
|
import time
|
||||||
|
|
||||||
|
record_type, action, fault_reason, recovery_state, detail, incident_id, network_ok, camera_ok, ros_ok, bside_ok, gps_ok, gps_device_present, last_action, last_action_epoch_ms, network_fail_count, targeted_restart_count, full_restart_count = sys.argv[1:18]
|
||||||
|
payload = {
|
||||||
|
"ts_unix_ms": time.time_ns() // 1_000_000,
|
||||||
|
"record_type": record_type,
|
||||||
|
"action": action,
|
||||||
|
"fault_reason": fault_reason,
|
||||||
|
"recovery_state": recovery_state,
|
||||||
|
"detail": detail,
|
||||||
|
"incident_id": incident_id or None,
|
||||||
|
"network_ok": network_ok == "1",
|
||||||
|
"camera_ok": camera_ok == "1",
|
||||||
|
"ros_ok": ros_ok == "1",
|
||||||
|
"bside_ok": bside_ok == "1",
|
||||||
|
"gps_ok": gps_ok == "1",
|
||||||
|
"gps_device_present": gps_device_present == "1",
|
||||||
|
"network_fail_count": int(network_fail_count),
|
||||||
|
"targeted_restart_count": int(targeted_restart_count),
|
||||||
|
"full_restart_count": int(full_restart_count),
|
||||||
|
"last_action": last_action,
|
||||||
|
"last_action_epoch_ms": int(last_action_epoch_ms or 0),
|
||||||
|
}
|
||||||
|
print(json.dumps(payload, separators=(",", ":"), ensure_ascii=False))
|
||||||
|
PY
|
||||||
|
}
|
||||||
|
|
||||||
|
watchdog_append_event() {
|
||||||
|
local line=""
|
||||||
|
|
||||||
|
[[ -n "${WATCHDOG_EVENT_LOG}" ]] || return 0
|
||||||
|
if ! line="$(watchdog_emit_json "$@" 2>&1)"; then
|
||||||
|
if (( WATCHDOG_EVENT_LOG_FAILURE_REPORTED == 0 )); then
|
||||||
|
blitz_log "${STEP}" "watchdog-event-log" "failure" "path=${WATCHDOG_EVENT_LOG} detail=${line}" 0 || true
|
||||||
|
WATCHDOG_EVENT_LOG_FAILURE_REPORTED=1
|
||||||
|
fi
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
if ! blitz_jsonl_append_line "${WATCHDOG_EVENT_LOG}" "${line}"; then
|
||||||
|
if (( WATCHDOG_EVENT_LOG_FAILURE_REPORTED == 0 )); then
|
||||||
|
blitz_log "${STEP}" "watchdog-event-log" "failure" "path=${WATCHDOG_EVENT_LOG} detail=append-failed" 0 || true
|
||||||
|
WATCHDOG_EVENT_LOG_FAILURE_REPORTED=1
|
||||||
|
fi
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
WATCHDOG_EVENT_LOG_FAILURE_REPORTED=0
|
||||||
|
}
|
||||||
|
|
||||||
|
watchdog_append_sample() {
|
||||||
|
local line=""
|
||||||
|
|
||||||
|
[[ -n "${WATCHDOG_SAMPLE_LOG}" ]] || return 0
|
||||||
|
if ! line="$(watchdog_emit_json "$@" 2>&1)"; then
|
||||||
|
if (( WATCHDOG_SAMPLE_LOG_FAILURE_REPORTED == 0 )); then
|
||||||
|
blitz_log "${STEP}" "watchdog-sample-log" "failure" "path=${WATCHDOG_SAMPLE_LOG} detail=${line}" 0 || true
|
||||||
|
WATCHDOG_SAMPLE_LOG_FAILURE_REPORTED=1
|
||||||
|
fi
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
if ! blitz_jsonl_append_line "${WATCHDOG_SAMPLE_LOG}" "${line}"; then
|
||||||
|
if (( WATCHDOG_SAMPLE_LOG_FAILURE_REPORTED == 0 )); then
|
||||||
|
blitz_log "${STEP}" "watchdog-sample-log" "failure" "path=${WATCHDOG_SAMPLE_LOG} detail=append-failed" 0 || true
|
||||||
|
WATCHDOG_SAMPLE_LOG_FAILURE_REPORTED=1
|
||||||
|
fi
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
WATCHDOG_SAMPLE_LOG_FAILURE_REPORTED=0
|
||||||
|
}
|
||||||
|
|
||||||
|
watchdog_record_state_transition() {
|
||||||
|
local fault_reason="$1"
|
||||||
|
local recovery_state="$2"
|
||||||
|
|
||||||
|
if [[ "${fault_reason}" == "${LAST_REPORTED_FAULT_REASON}" && "${recovery_state}" == "${LAST_REPORTED_RECOVERY_STATE}" ]]; then
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
watchdog_append_event "event" "state-transition" "${fault_reason}" "${recovery_state}" "" ""
|
||||||
|
LAST_REPORTED_FAULT_REASON="${fault_reason}"
|
||||||
|
LAST_REPORTED_RECOVERY_STATE="${recovery_state}"
|
||||||
|
}
|
||||||
|
|
||||||
|
watchdog_launch_incident() {
|
||||||
|
local reason="$1"
|
||||||
|
local unit_name="$2"
|
||||||
|
|
||||||
|
blitz_launch_incident_capture \
|
||||||
|
--source watchdog \
|
||||||
|
--reason "${reason}" \
|
||||||
|
--unit "${unit_name}" \
|
||||||
|
--result failure \
|
||||||
|
--exit-status 1 2>/dev/null || true
|
||||||
|
}
|
||||||
|
|
||||||
|
set_last_action() {
|
||||||
|
LAST_ACTION="$1"
|
||||||
|
LAST_ACTION_EPOCH_MS="$(now_epoch_ms)"
|
||||||
|
}
|
||||||
|
|
||||||
|
targeted_restart_total() {
|
||||||
|
local total=0
|
||||||
|
local key
|
||||||
|
|
||||||
|
for key in "${!TARGETED_RESTART_WINDOW_COUNT[@]}"; do
|
||||||
|
total=$(( total + TARGETED_RESTART_WINDOW_COUNT["${key}"] ))
|
||||||
|
done
|
||||||
|
printf '%s\n' "${total}"
|
||||||
|
}
|
||||||
|
|
||||||
|
register_targeted_restart() {
|
||||||
|
local fault_key="$1"
|
||||||
|
local now_sec
|
||||||
|
local window_start
|
||||||
|
local count
|
||||||
|
|
||||||
|
now_sec="$(now_epoch_sec)"
|
||||||
|
window_start="${TARGETED_RESTART_WINDOW_START["${fault_key}"]:-0}"
|
||||||
|
count="${TARGETED_RESTART_WINDOW_COUNT["${fault_key}"]:-0}"
|
||||||
|
if (( window_start == 0 || now_sec - window_start > 60 )); then
|
||||||
|
window_start="${now_sec}"
|
||||||
|
count=1
|
||||||
|
else
|
||||||
|
count=$(( count + 1 ))
|
||||||
|
fi
|
||||||
|
TARGETED_RESTART_WINDOW_START["${fault_key}"]="${window_start}"
|
||||||
|
TARGETED_RESTART_WINDOW_COUNT["${fault_key}"]="${count}"
|
||||||
|
(( count >= 2 ))
|
||||||
|
}
|
||||||
|
|
||||||
|
record_full_restart() {
|
||||||
|
local now_sec
|
||||||
|
|
||||||
|
now_sec="$(now_epoch_sec)"
|
||||||
|
if (( FULL_RESTART_WINDOW_START == 0 || now_sec - FULL_RESTART_WINDOW_START > 600 )); then
|
||||||
|
FULL_RESTART_WINDOW_START="${now_sec}"
|
||||||
|
FULL_RESTART_WINDOW_COUNT=1
|
||||||
|
else
|
||||||
|
FULL_RESTART_WINDOW_COUNT=$(( FULL_RESTART_WINDOW_COUNT + 1 ))
|
||||||
|
fi
|
||||||
|
if (( FULL_RESTART_WINDOW_COUNT >= 3 )); then
|
||||||
|
BACKOFF_UNTIL=$(( now_sec + 60 ))
|
||||||
|
watchdog_append_event "event" "backoff-enter" "backoff" "backoff" "full_restart_count=${FULL_RESTART_WINDOW_COUNT}" ""
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
restart_bside_targeted() {
|
||||||
|
local fault_key="$1"
|
||||||
|
local reason="$2"
|
||||||
|
local rc
|
||||||
|
local incident_id=""
|
||||||
|
|
||||||
|
if register_targeted_restart "${fault_key}"; then
|
||||||
|
blitz_log "${STEP}" "escalate-full-restart" "start" "reason=${reason}" 0
|
||||||
|
watchdog_append_event "event" "escalate-full-restart" "${reason}-escalated" "recovering" "fault_key=${fault_key}" ""
|
||||||
|
full_restart_stack "${reason}-escalated"
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
|
||||||
|
incident_id="$(watchdog_launch_incident "${reason}" "${B_SIDE_SERVICE}")"
|
||||||
|
set_last_action "restart-bside"
|
||||||
|
RECOVERY_ACTION_TAKEN=1
|
||||||
|
blitz_log "${STEP}" "restart-bside" "start" "reason=${reason}" 0
|
||||||
|
watchdog_append_event "event" "restart-bside-start" "${reason}" "recovering" "fault_key=${fault_key}" "${incident_id}"
|
||||||
|
if systemctl restart "${B_SIDE_SERVICE}"; then
|
||||||
|
blitz_log "${STEP}" "restart-bside" "success" "reason=${reason}" 0
|
||||||
|
watchdog_append_event "event" "restart-bside-success" "${reason}" "recovering" "fault_key=${fault_key}" "${incident_id}"
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
|
||||||
|
rc=$?
|
||||||
|
blitz_log "${STEP}" "restart-bside" "failure" "reason=${reason}" "${rc}"
|
||||||
|
watchdog_append_event "event" "restart-bside-failure" "${reason}" "recovering" "fault_key=${fault_key} rc=${rc}" "${incident_id}"
|
||||||
|
return "${rc}"
|
||||||
|
}
|
||||||
|
|
||||||
|
full_restart_stack() {
|
||||||
|
local reason="$1"
|
||||||
|
local rc
|
||||||
|
local incident_id=""
|
||||||
|
|
||||||
|
incident_id="$(watchdog_launch_incident "${reason}" "blitz-robot.target")"
|
||||||
|
set_last_action "full-restart"
|
||||||
|
RECOVERY_ACTION_TAKEN=1
|
||||||
|
recovery_state="recovering"
|
||||||
|
fault_reason="${reason}"
|
||||||
|
|
||||||
|
blitz_log "${STEP}" "full-restart-stop-bside" "start" "reason=${reason}" 0
|
||||||
|
watchdog_append_event "event" "full-restart-start" "${reason}" "recovering" "" "${incident_id}"
|
||||||
|
systemctl stop "${B_SIDE_SERVICE}" || true
|
||||||
|
|
||||||
|
if systemctl restart "${ROS_SERVICE}"; then
|
||||||
|
blitz_log "${STEP}" "full-restart-restart-ros" "success" "reason=${reason}" 0
|
||||||
|
else
|
||||||
|
rc=$?
|
||||||
|
blitz_log "${STEP}" "full-restart-restart-ros" "failure" "reason=${reason}" "${rc}"
|
||||||
|
record_full_restart
|
||||||
|
return "${rc}"
|
||||||
|
fi
|
||||||
|
|
||||||
|
if bash "${BOOT_SCRIPT_DIR}/wait-for-unix-socket.sh" --step "${STEP}" --timeout "${BLITZ_ROS_SOCKET_WAIT_SEC}"; then
|
||||||
|
:
|
||||||
|
else
|
||||||
|
rc=$?
|
||||||
|
blitz_log "${STEP}" "full-restart-wait-socket" "failure" "reason=${reason}" "${rc}"
|
||||||
|
record_full_restart
|
||||||
|
return "${rc}"
|
||||||
|
fi
|
||||||
|
|
||||||
|
if systemctl start "${B_SIDE_SERVICE}"; then
|
||||||
|
blitz_log "${STEP}" "full-restart-start-bside" "success" "reason=${reason}" 0
|
||||||
|
else
|
||||||
|
rc=$?
|
||||||
|
blitz_log "${STEP}" "full-restart-start-bside" "failure" "reason=${reason}" "${rc}"
|
||||||
|
watchdog_append_event "event" "full-restart-failure" "${reason}" "recovering" "stage=start-bside rc=${rc}" "${incident_id}"
|
||||||
|
record_full_restart
|
||||||
|
return "${rc}"
|
||||||
|
fi
|
||||||
|
watchdog_append_event "event" "full-restart-success" "${reason}" "recovering" "" "${incident_id}"
|
||||||
|
record_full_restart
|
||||||
|
}
|
||||||
|
|
||||||
|
network_fault_injected() {
|
||||||
|
[[ "${BLITZ_WATCHDOG_ALLOW_FAULT_INJECTION}" == "1" && -f "${NETWORK_FAULT_FILE}" ]]
|
||||||
|
}
|
||||||
|
|
||||||
|
resolve_network_interface() {
|
||||||
|
NETWORK_LAST_INTERFACE="$(blitz_resolve_5g_interface || true)"
|
||||||
|
if [[ -n "${NETWORK_LAST_INTERFACE}" ]]; then
|
||||||
|
NETWORK_ROUTE_INTERFACE_LAST_KNOWN="${NETWORK_LAST_INTERFACE}"
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
return 1
|
||||||
|
}
|
||||||
|
|
||||||
|
network_route_targets() {
|
||||||
|
local target
|
||||||
|
|
||||||
|
if [[ -n "${BLITZ_TIME_SERVER_IP:-}" ]]; then
|
||||||
|
printf '%s\n' "${BLITZ_TIME_SERVER_IP}"
|
||||||
|
fi
|
||||||
|
for target in ${BLITZ_5G_ROUTE_TARGETS//,/ }; do
|
||||||
|
if [[ -n "${target}" && "${target}" != "${BLITZ_TIME_SERVER_IP:-}" ]]; then
|
||||||
|
printf '%s\n' "${target}"
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
}
|
||||||
|
|
||||||
|
log_target_route_paths() {
|
||||||
|
local action="$1"
|
||||||
|
local target
|
||||||
|
local route_output
|
||||||
|
|
||||||
|
while IFS= read -r target; do
|
||||||
|
[[ -n "${target}" ]] || continue
|
||||||
|
route_output="$(ip route get "${target}" 2>&1 | head -n 1 || true)"
|
||||||
|
if [[ -z "${route_output}" ]]; then
|
||||||
|
route_output="unresolved"
|
||||||
|
fi
|
||||||
|
blitz_log "${STEP}" "route-path" "info" "action=${action} target=${target} route=${route_output}" 0
|
||||||
|
done < <(network_route_targets)
|
||||||
|
}
|
||||||
|
|
||||||
|
route_output_uses_interface() {
|
||||||
|
local route_output="$1"
|
||||||
|
local interface_name="$2"
|
||||||
|
|
||||||
|
[[ -n "${interface_name}" ]] || return 1
|
||||||
|
[[ "${route_output}" == *" dev ${interface_name} "* || "${route_output}" == *" dev ${interface_name}" ]]
|
||||||
|
}
|
||||||
|
|
||||||
|
route_output_uses_gateway() {
|
||||||
|
local route_output="$1"
|
||||||
|
local gateway="$2"
|
||||||
|
|
||||||
|
[[ -n "${gateway}" ]] || return 1
|
||||||
|
[[ "${route_output}" == *"via ${gateway}"* ]]
|
||||||
|
}
|
||||||
|
|
||||||
|
route_is_desired_target_route() {
|
||||||
|
local route_output="$1"
|
||||||
|
local interface_name="$2"
|
||||||
|
local gateway="$3"
|
||||||
|
|
||||||
|
route_output_uses_interface "${route_output}" "${interface_name}" \
|
||||||
|
&& route_output_uses_gateway "${route_output}" "${gateway}"
|
||||||
|
}
|
||||||
|
|
||||||
|
route_is_managed_5g_route() {
|
||||||
|
local route_output="$1"
|
||||||
|
local interface_name="${2:-}"
|
||||||
|
local gateway="${3:-}"
|
||||||
|
|
||||||
|
if route_output_uses_interface "${route_output}" "${interface_name}"; then
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
if route_output_uses_gateway "${route_output}" "${gateway}"; then
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
if route_output_uses_gateway "${route_output}" "${BLITZ_5G_GATEWAY:-}"; then
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
return 1
|
||||||
|
}
|
||||||
|
|
||||||
|
resolve_route_cleanup_interface() {
|
||||||
|
local interface_name=""
|
||||||
|
local info_json="${BLITZ_5G_INFO_JSON:-}"
|
||||||
|
|
||||||
|
if [[ -n "${NETWORK_LAST_INTERFACE}" ]]; then
|
||||||
|
printf '%s\n' "${NETWORK_LAST_INTERFACE}"
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
if [[ -n "${NETWORK_ROUTE_INTERFACE_LAST_KNOWN}" ]]; then
|
||||||
|
printf '%s\n' "${NETWORK_ROUTE_INTERFACE_LAST_KNOWN}"
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
|
||||||
|
interface_name="$(blitz_read_5g_info_interface "${info_json}" || true)"
|
||||||
|
if [[ -n "${interface_name}" ]]; then
|
||||||
|
printf '%s\n' "${interface_name}"
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
return 1
|
||||||
|
}
|
||||||
|
|
||||||
|
resolve_network_gateway() {
|
||||||
|
local interface_name="$1"
|
||||||
|
local default_route
|
||||||
|
local gateway=""
|
||||||
|
local tokens=()
|
||||||
|
local index
|
||||||
|
|
||||||
|
default_route="$(ip -o route show default dev "${interface_name}" 2>/dev/null | head -n 1 || true)"
|
||||||
|
if [[ -n "${default_route}" ]]; then
|
||||||
|
read -r -a tokens <<< "${default_route}"
|
||||||
|
for (( index=0; index<${#tokens[@]}-1; index++ )); do
|
||||||
|
if [[ "${tokens[index]}" == "via" ]]; then
|
||||||
|
gateway="${tokens[index + 1]}"
|
||||||
|
break
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [[ -n "${gateway}" ]]; then
|
||||||
|
printf '%s\n' "${gateway}"
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
if [[ -n "${BLITZ_5G_GATEWAY:-}" ]]; then
|
||||||
|
printf '%s\n' "${BLITZ_5G_GATEWAY}"
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
return 1
|
||||||
|
}
|
||||||
|
|
||||||
|
sync_target_routes_to_5g() {
|
||||||
|
local interface_name="$1"
|
||||||
|
local gateway="${2:-}"
|
||||||
|
local route_output=""
|
||||||
|
local updated=0
|
||||||
|
local target
|
||||||
|
local rc
|
||||||
|
|
||||||
|
if [[ -z "${interface_name}" ]]; then
|
||||||
|
return 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [[ -z "${gateway}" ]]; then
|
||||||
|
gateway="$(resolve_network_gateway "${interface_name}" || true)"
|
||||||
|
fi
|
||||||
|
if [[ -z "${gateway}" ]]; then
|
||||||
|
blitz_log "${STEP}" "route-sync-gateway" "failure" "interface=${interface_name}" 1
|
||||||
|
return 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
while IFS= read -r target; do
|
||||||
|
[[ -n "${target}" ]] || continue
|
||||||
|
route_output="$(ip route show "${target}/32" 2>/dev/null | head -n 1 || true)"
|
||||||
|
if [[ -n "${route_output}" ]] && route_is_desired_target_route "${route_output}" "${interface_name}" "${gateway}"; then
|
||||||
|
continue
|
||||||
|
fi
|
||||||
|
if ip route replace "${target}/32" via "${gateway}" dev "${interface_name}"; then
|
||||||
|
updated=1
|
||||||
|
blitz_log "${STEP}" "route-sync-target" "success" "target=${target} interface=${interface_name} gateway=${gateway}" 0
|
||||||
|
else
|
||||||
|
rc=$?
|
||||||
|
blitz_log "${STEP}" "route-sync-target" "failure" "target=${target} interface=${interface_name} gateway=${gateway}" "${rc}"
|
||||||
|
return "${rc}"
|
||||||
|
fi
|
||||||
|
done < <(network_route_targets)
|
||||||
|
|
||||||
|
if (( updated == 1 )); then
|
||||||
|
NETWORK_ROUTE_INTERFACE_LAST_KNOWN="${interface_name}"
|
||||||
|
log_target_route_paths "sync-to-5g"
|
||||||
|
fi
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
|
||||||
|
clear_target_routes_from_5g() {
|
||||||
|
local interface_name="${1:-}"
|
||||||
|
local gateway="${2:-}"
|
||||||
|
local route_output=""
|
||||||
|
local target
|
||||||
|
local removed_any=0
|
||||||
|
local rc
|
||||||
|
|
||||||
|
if [[ -z "${interface_name}" ]]; then
|
||||||
|
interface_name="$(resolve_route_cleanup_interface || true)"
|
||||||
|
fi
|
||||||
|
if [[ -z "${gateway}" && -n "${interface_name}" ]]; then
|
||||||
|
gateway="$(resolve_network_gateway "${interface_name}" || true)"
|
||||||
|
fi
|
||||||
|
if [[ -z "${gateway}" ]]; then
|
||||||
|
gateway="${BLITZ_5G_GATEWAY:-}"
|
||||||
|
fi
|
||||||
|
|
||||||
|
while IFS= read -r target; do
|
||||||
|
[[ -n "${target}" ]] || continue
|
||||||
|
route_output="$(ip route show "${target}/32" 2>/dev/null | head -n 1 || true)"
|
||||||
|
if [[ -z "${route_output}" ]] || ! route_is_managed_5g_route "${route_output}" "${interface_name}" "${gateway}"; then
|
||||||
|
continue
|
||||||
|
fi
|
||||||
|
if ip route del "${target}/32"; then
|
||||||
|
removed_any=1
|
||||||
|
blitz_log "${STEP}" "route-clear-target" "success" "target=${target} interface=${interface_name:-unknown} gateway=${gateway:-unknown}" 0
|
||||||
|
else
|
||||||
|
rc=$?
|
||||||
|
blitz_log "${STEP}" "route-clear-target" "failure" "target=${target} interface=${interface_name:-unknown} gateway=${gateway:-unknown}" "${rc}"
|
||||||
|
return "${rc}"
|
||||||
|
fi
|
||||||
|
done < <(network_route_targets)
|
||||||
|
|
||||||
|
if (( removed_any == 1 )); then
|
||||||
|
blitz_log "${STEP}" "route-clear" "success" "interface=${interface_name:-unknown} gateway=${gateway:-unknown}" 0
|
||||||
|
log_target_route_paths "clear-from-5g"
|
||||||
|
fi
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
|
||||||
|
repair_network_routes() {
|
||||||
|
local interface_name="$1"
|
||||||
|
local gateway=""
|
||||||
|
local route_output
|
||||||
|
|
||||||
|
if [[ -z "${interface_name}" ]]; then
|
||||||
|
return 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
gateway="$(resolve_network_gateway "${interface_name}" || true)"
|
||||||
|
if [[ -z "${gateway}" ]]; then
|
||||||
|
blitz_log "${STEP}" "route-repair-gateway" "failure" "interface=${interface_name}" 1
|
||||||
|
return 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
if ! sync_target_routes_to_5g "${interface_name}" "${gateway}"; then
|
||||||
|
clear_target_routes_from_5g "${interface_name}" "${gateway}" || true
|
||||||
|
return 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
route_output="$(blitz_route_ready "${BLITZ_TIME_SERVER_IP}" "${interface_name}" || true)"
|
||||||
|
if [[ -z "${route_output}" ]]; then
|
||||||
|
clear_target_routes_from_5g "${interface_name}" "${gateway}" || true
|
||||||
|
blitz_log "${STEP}" "route-repair-postcheck" "failure" "interface=${interface_name} gateway=${gateway}" 1
|
||||||
|
return 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
if ! ping -I "${interface_name}" -c 1 -W 2 "${BLITZ_TIME_SERVER_IP}" >/dev/null 2>&1; then
|
||||||
|
clear_target_routes_from_5g "${interface_name}" "${gateway}" || true
|
||||||
|
blitz_log "${STEP}" "route-repair-probe" "failure" "interface=${interface_name} target=${BLITZ_TIME_SERVER_IP}" 1
|
||||||
|
return 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
blitz_log "${STEP}" "route-repair-postcheck" "success" "interface=${interface_name} gateway=${gateway} route=${route_output}" 0
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
|
||||||
|
network_is_healthy() {
|
||||||
|
local route_output
|
||||||
|
|
||||||
|
NETWORK_LAST_INTERFACE=""
|
||||||
|
if network_fault_injected; then
|
||||||
|
return 1
|
||||||
|
fi
|
||||||
|
if ! resolve_network_interface; then
|
||||||
|
return 1
|
||||||
|
fi
|
||||||
|
route_output="$(blitz_route_ready "${BLITZ_TIME_SERVER_IP}" "${NETWORK_LAST_INTERFACE}" || true)"
|
||||||
|
if [[ -z "${route_output}" ]]; then
|
||||||
|
return 1
|
||||||
|
fi
|
||||||
|
ping -I "${NETWORK_LAST_INTERFACE}" -c 1 -W 2 "${BLITZ_TIME_SERVER_IP}" >/dev/null 2>&1
|
||||||
|
}
|
||||||
|
|
||||||
|
fallback_network_is_healthy() {
|
||||||
|
local route_output
|
||||||
|
|
||||||
|
if [[ -z "${BLITZ_TIME_SERVER_IP:-}" ]]; then
|
||||||
|
return 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
route_output="$(blitz_route_ready "${BLITZ_TIME_SERVER_IP}" || true)"
|
||||||
|
if [[ -z "${route_output}" ]]; then
|
||||||
|
return 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
ping -c 1 -W 2 "${BLITZ_TIME_SERVER_IP}" >/dev/null 2>&1
|
||||||
|
}
|
||||||
|
|
||||||
|
wait_for_network_recovery() {
|
||||||
|
local timeout_sec="$1"
|
||||||
|
local waited=0
|
||||||
|
|
||||||
|
while (( waited < timeout_sec )); do
|
||||||
|
if network_is_healthy; then
|
||||||
|
blitz_log "${STEP}" "network-postcheck" "success" "interface=${NETWORK_LAST_INTERFACE} waited_sec=${waited}" 0
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
if (( waited == 0 || waited % 5 == 0 )); then
|
||||||
|
blitz_log "${STEP}" "network-postcheck" "waiting" "interface=${NETWORK_LAST_INTERFACE:-unresolved} waited_sec=${waited}" 0
|
||||||
|
fi
|
||||||
|
sleep 1
|
||||||
|
waited=$(( waited + 1 ))
|
||||||
|
done
|
||||||
|
|
||||||
|
blitz_log "${STEP}" "network-postcheck" "failure" "interface=${NETWORK_LAST_INTERFACE:-unresolved} timeout_sec=${timeout_sec}" 1
|
||||||
|
return 1
|
||||||
|
}
|
||||||
|
|
||||||
|
perform_network_recovery() {
|
||||||
|
local rc=0
|
||||||
|
local incident_id=""
|
||||||
|
|
||||||
|
if resolve_network_interface && repair_network_routes "${NETWORK_LAST_INTERFACE}"; then
|
||||||
|
set_last_action "route-repair"
|
||||||
|
RECOVERY_ACTION_TAKEN=1
|
||||||
|
NETWORK_COOLDOWN_UNTIL=$(( $(now_epoch_sec) + BLITZ_NETWORK_RECOVERY_COOLDOWN_SEC ))
|
||||||
|
NETWORK_FAIL_COUNT=0
|
||||||
|
blitz_log "${STEP}" "network-recovery" "success" "mode=route-repair interface=${NETWORK_LAST_INTERFACE}" 0
|
||||||
|
watchdog_append_event "event" "route-repair-success" "network_or_robot_unreachable" "recovering" "interface=${NETWORK_LAST_INTERFACE}" ""
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
|
||||||
|
incident_id="$(watchdog_launch_incident "network-recovery" "blitz-5g-dial.service")"
|
||||||
|
set_last_action "network-recovery"
|
||||||
|
RECOVERY_ACTION_TAKEN=1
|
||||||
|
blitz_log "${STEP}" "network-recovery" "start" "fail_count=${NETWORK_FAIL_COUNT}" 0
|
||||||
|
watchdog_append_event "event" "network-recovery-start" "network_or_robot_unreachable" "recovering" "fail_count=${NETWORK_FAIL_COUNT}" "${incident_id}"
|
||||||
|
systemctl stop "${B_SIDE_SERVICE}" || true
|
||||||
|
|
||||||
|
if bash "${BOOT_SCRIPT_DIR}/5g-dial.sh"; then
|
||||||
|
:
|
||||||
|
else
|
||||||
|
rc=$?
|
||||||
|
blitz_log "${STEP}" "network-redial" "failure" "fail_count=${NETWORK_FAIL_COUNT} script=${BOOT_SCRIPT_DIR}/5g-dial.sh" "${rc}"
|
||||||
|
watchdog_append_event "event" "network-recovery-failure" "network_or_robot_unreachable" "recovering" "stage=redial rc=${rc}" "${incident_id}"
|
||||||
|
return "${rc}"
|
||||||
|
fi
|
||||||
|
|
||||||
|
if wait_for_network_recovery "${BLITZ_5G_ROUTE_WAIT_SEC}"; then
|
||||||
|
:
|
||||||
|
else
|
||||||
|
rc=$?
|
||||||
|
blitz_log "${STEP}" "network-recovery" "failure" "fail_count=${NETWORK_FAIL_COUNT} interface=${NETWORK_LAST_INTERFACE:-unresolved}" "${rc}"
|
||||||
|
watchdog_append_event "event" "network-recovery-failure" "network_or_robot_unreachable" "recovering" "stage=postcheck rc=${rc}" "${incident_id}"
|
||||||
|
return "${rc}"
|
||||||
|
fi
|
||||||
|
|
||||||
|
NETWORK_COOLDOWN_UNTIL=$(( $(now_epoch_sec) + BLITZ_NETWORK_RECOVERY_COOLDOWN_SEC ))
|
||||||
|
NETWORK_FAIL_COUNT=0
|
||||||
|
watchdog_append_event "event" "network-recovery-success" "network_or_robot_unreachable" "recovering" "interface=${NETWORK_LAST_INTERFACE:-unresolved}" "${incident_id}"
|
||||||
|
if ros_receiver_healthy "${BLITZ_HEALTH_STALE_SEC}"; then
|
||||||
|
restart_bside_targeted "network" "network-recovered"
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
full_restart_stack "network-recovered-ros-unhealthy"
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
|
||||||
|
blitz_load_boot_env
|
||||||
|
blitz_require_root "${STEP}"
|
||||||
|
blitz_require_command systemctl "${STEP}"
|
||||||
|
blitz_require_command stat "${STEP}"
|
||||||
|
blitz_require_command ping "${STEP}"
|
||||||
|
blitz_require_command python3 "${STEP}"
|
||||||
|
blitz_prepare_runtime_dir
|
||||||
|
blitz_require_run_context
|
||||||
|
|
||||||
|
B_SIDE_STATUS_FILE="${BLITZ_RUNTIME_DIR}/b-side-omnid.status.json"
|
||||||
|
ROS_STATUS_FILE="${BLITZ_RUNTIME_DIR}/ros-receiver.status.json"
|
||||||
|
WATCHDOG_STATUS_FILE="${BLITZ_RUNTIME_DIR}/watchdog.status.json"
|
||||||
|
NETWORK_FAULT_FILE="${BLITZ_RUNTIME_DIR}/fault-injection-network-down"
|
||||||
|
WATCHDOG_EVENT_LOG="${BLITZ_RUN_DIR}/watchdog-events.jsonl"
|
||||||
|
WATCHDOG_SAMPLE_LOG="${BLITZ_RUN_DIR}/watchdog-samples.jsonl"
|
||||||
|
|
||||||
|
while true; do
|
||||||
|
fault_reason="none"
|
||||||
|
recovery_state="ok"
|
||||||
|
network_ok=1
|
||||||
|
camera_ok=1
|
||||||
|
ros_ok=1
|
||||||
|
bside_ok=1
|
||||||
|
gps_ok=1
|
||||||
|
gps_device_present=1
|
||||||
|
RECOVERY_ACTION_TAKEN=0
|
||||||
|
now_sec="$(now_epoch_sec)"
|
||||||
|
|
||||||
|
if gps_monitor_enabled; then
|
||||||
|
gps_device_present="${GPS_DEVICE_PRESENT_STATE}"
|
||||||
|
if (( GPS_DEVICE_PRESENT_STATE == 0 || GPS_STACK_ACTIVE_STATE == 0 )); then
|
||||||
|
gps_ok=0
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
|
if (( BACKOFF_UNTIL > now_sec )); then
|
||||||
|
fault_reason="backoff"
|
||||||
|
recovery_state="backoff"
|
||||||
|
watchdog_record_state_transition "${fault_reason}" "${recovery_state}"
|
||||||
|
write_watchdog_status "${fault_reason}" "${recovery_state}" 0 0 0 0 "${gps_ok}" "${gps_device_present}"
|
||||||
|
watchdog_append_sample "sample" "loop" "${fault_reason}" "${recovery_state}" "" "" 0 0 0 0 "${gps_ok}" "${gps_device_present}"
|
||||||
|
sleep "${BLITZ_WATCHDOG_INTERVAL_SEC}"
|
||||||
|
continue
|
||||||
|
fi
|
||||||
|
|
||||||
|
if (( NETWORK_COOLDOWN_UNTIL > now_sec )); then
|
||||||
|
recovery_state="recovering"
|
||||||
|
elif ! network_is_healthy; then
|
||||||
|
clear_target_routes_from_5g || true
|
||||||
|
if fallback_network_is_healthy; then
|
||||||
|
NETWORK_FAIL_COUNT=0
|
||||||
|
fault_reason="network_fallback_active"
|
||||||
|
recovery_state="degraded"
|
||||||
|
blitz_log "${STEP}" "network-check" "fallback" "interface=${NETWORK_LAST_INTERFACE:-unresolved} target=${BLITZ_TIME_SERVER_IP}" 0
|
||||||
|
if (( NETWORK_PRIMARY_LAST_RETRY_SEC == 0 || now_sec - NETWORK_PRIMARY_LAST_RETRY_SEC >= 10 )); then
|
||||||
|
NETWORK_PRIMARY_LAST_RETRY_SEC="${now_sec}"
|
||||||
|
if resolve_network_interface && repair_network_routes "${NETWORK_LAST_INTERFACE}"; then
|
||||||
|
NETWORK_PRIMARY_LAST_RETRY_SEC=0
|
||||||
|
fault_reason="none"
|
||||||
|
recovery_state="ok"
|
||||||
|
blitz_log "${STEP}" "network-check" "primary-restored" "interface=${NETWORK_LAST_INTERFACE} target=${BLITZ_TIME_SERVER_IP}" 0
|
||||||
|
log_target_route_paths "primary-restored"
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
else
|
||||||
|
network_ok=0
|
||||||
|
NETWORK_FAIL_COUNT=$(( NETWORK_FAIL_COUNT + 1 ))
|
||||||
|
fault_reason="network_or_robot_unreachable"
|
||||||
|
recovery_state="recovering"
|
||||||
|
blitz_log "${STEP}" "network-check" "failure" "count=${NETWORK_FAIL_COUNT} interface=${NETWORK_LAST_INTERFACE:-unresolved}" 1
|
||||||
|
if (( NETWORK_FAIL_COUNT >= BLITZ_NETWORK_FAIL_THRESHOLD )); then
|
||||||
|
perform_network_recovery || true
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
else
|
||||||
|
NETWORK_PRIMARY_LAST_RETRY_SEC=0
|
||||||
|
NETWORK_FAIL_COUNT=0
|
||||||
|
sync_target_routes_to_5g "${NETWORK_LAST_INTERFACE}" || true
|
||||||
|
fi
|
||||||
|
|
||||||
|
if check_gps_health "${now_sec}"; then
|
||||||
|
gps_ok=1
|
||||||
|
else
|
||||||
|
gps_ok=0
|
||||||
|
gps_device_present="${GPS_DEVICE_PRESENT_STATE}"
|
||||||
|
if [[ "${fault_reason}" == "none" ]]; then
|
||||||
|
if (( GPS_DEVICE_PRESENT_STATE == 0 )); then
|
||||||
|
fault_reason="gps_device_missing"
|
||||||
|
else
|
||||||
|
fault_reason="gps_reconnect_failed"
|
||||||
|
fi
|
||||||
|
recovery_state="degraded"
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
gps_device_present="${GPS_DEVICE_PRESENT_STATE}"
|
||||||
|
|
||||||
|
if [[ ! -e "${OMNI_CAMERA_DEVICE}" ]]; then
|
||||||
|
camera_ok=0
|
||||||
|
fault_reason="camera_missing"
|
||||||
|
recovery_state="degraded"
|
||||||
|
CAMERA_MISSING_PREV=1
|
||||||
|
CAMERA_RECOVERY_STABLE_COUNT=0
|
||||||
|
elif (( RECOVERY_ACTION_TAKEN == 0 && CAMERA_MISSING_PREV == 1 )); then
|
||||||
|
CAMERA_RECOVERY_STABLE_COUNT=$(( CAMERA_RECOVERY_STABLE_COUNT + 1 ))
|
||||||
|
recovery_state="recovering"
|
||||||
|
fault_reason="camera_recovered"
|
||||||
|
if (( CAMERA_RECOVERY_STABLE_COUNT >= 2 )); then
|
||||||
|
restart_bside_targeted "camera" "camera-reappeared" || true
|
||||||
|
CAMERA_MISSING_PREV=0
|
||||||
|
CAMERA_RECOVERY_STABLE_COUNT=0
|
||||||
|
fi
|
||||||
|
else
|
||||||
|
CAMERA_RECOVERY_STABLE_COUNT=0
|
||||||
|
fi
|
||||||
|
|
||||||
|
if (( RECOVERY_ACTION_TAKEN == 0 )) && { ! service_is_active "${B_SIDE_SERVICE}" || ! status_file_fresh "${B_SIDE_STATUS_FILE}" "${BLITZ_HEALTH_STALE_SEC}"; }; then
|
||||||
|
bside_ok=0
|
||||||
|
fault_reason="bside_status_stale"
|
||||||
|
recovery_state="recovering"
|
||||||
|
restart_bside_targeted "bside" "bside-unhealthy" || true
|
||||||
|
fi
|
||||||
|
|
||||||
|
if (( RECOVERY_ACTION_TAKEN == 0 )) && ! ros_receiver_healthy "${BLITZ_HEALTH_STALE_SEC}"; then
|
||||||
|
ros_ok=0
|
||||||
|
fault_reason="ros_receiver_unhealthy"
|
||||||
|
recovery_state="recovering"
|
||||||
|
full_restart_stack "ros-unhealthy" || true
|
||||||
|
fi
|
||||||
|
|
||||||
|
watchdog_record_state_transition "${fault_reason}" "${recovery_state}"
|
||||||
|
write_watchdog_status "${fault_reason}" "${recovery_state}" "${network_ok}" "${camera_ok}" "${ros_ok}" "${bside_ok}" "${gps_ok}" "${gps_device_present}"
|
||||||
|
watchdog_append_sample "sample" "loop" "${fault_reason}" "${recovery_state}" "" "" "${network_ok}" "${camera_ok}" "${ros_ok}" "${bside_ok}" "${gps_ok}" "${gps_device_present}"
|
||||||
|
sleep "${BLITZ_WATCHDOG_INTERVAL_SEC}"
|
||||||
|
done
|
||||||
15
scripts/boot/boot-gate.sh
Normal file
15
scripts/boot/boot-gate.sh
Normal file
@@ -0,0 +1,15 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||||
|
# shellcheck disable=SC1091
|
||||||
|
source "${SCRIPT_DIR}/common.sh"
|
||||||
|
|
||||||
|
STEP="boot-gate"
|
||||||
|
|
||||||
|
blitz_load_boot_env
|
||||||
|
|
||||||
|
blitz_log "${STEP}" "start" "start" "delay_sec=${BLITZ_BOOT_DELAY_SEC}" 0
|
||||||
|
blitz_log "${STEP}" "delay" "start" "sleep ${BLITZ_BOOT_DELAY_SEC}s before starting Blitz services" 0
|
||||||
|
sleep "${BLITZ_BOOT_DELAY_SEC}"
|
||||||
|
blitz_log "${STEP}" "delay" "success" "boot gate released after ${BLITZ_BOOT_DELAY_SEC}s" 0
|
||||||
661
scripts/boot/common.sh
Normal file
661
scripts/boot/common.sh
Normal file
@@ -0,0 +1,661 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
BOOT_SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||||
|
DEV_SCRIPT_DIR="$(cd "${BOOT_SCRIPT_DIR}/../dev" && pwd)"
|
||||||
|
|
||||||
|
source_with_nounset_off() {
|
||||||
|
set +u
|
||||||
|
# shellcheck disable=SC1090
|
||||||
|
source "$1"
|
||||||
|
set -u
|
||||||
|
}
|
||||||
|
|
||||||
|
blitz_host_from_addr() {
|
||||||
|
local value="${1:-}"
|
||||||
|
|
||||||
|
if [[ -z "${value}" ]]; then
|
||||||
|
return 1
|
||||||
|
fi
|
||||||
|
if [[ "${value}" == \[*\]:* ]]; then
|
||||||
|
value="${value#\[}"
|
||||||
|
printf '%s\n' "${value%%]:*}"
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
printf '%s\n' "${value%%:*}"
|
||||||
|
}
|
||||||
|
|
||||||
|
blitz_load_boot_env() {
|
||||||
|
local env_file
|
||||||
|
local default_time_server
|
||||||
|
local dev_run_root
|
||||||
|
local dev_runtime_dir
|
||||||
|
|
||||||
|
if [[ "${BLITZ_BOOT_ENV_LOADED:-0}" == "1" ]]; then
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
|
||||||
|
export BLITZ_BOOT_LOADING_ENV="1"
|
||||||
|
# shellcheck disable=SC1091
|
||||||
|
source "${DEV_SCRIPT_DIR}/load-env.sh"
|
||||||
|
unset BLITZ_BOOT_LOADING_ENV
|
||||||
|
|
||||||
|
for env_file in \
|
||||||
|
"${BOOT_SCRIPT_DIR}/robot-boot.env" \
|
||||||
|
"${BOOT_SCRIPT_DIR}/robot-boot.env.local"
|
||||||
|
do
|
||||||
|
if [[ -f "${env_file}" ]]; then
|
||||||
|
set -a
|
||||||
|
# shellcheck disable=SC1090
|
||||||
|
source "${env_file}"
|
||||||
|
set +a
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
|
||||||
|
if declare -F normalize_loaded_env_vars >/dev/null 2>&1; then
|
||||||
|
normalize_loaded_env_vars
|
||||||
|
fi
|
||||||
|
|
||||||
|
dev_run_root="${OMNISOCKETGO_ROOT}/logs"
|
||||||
|
dev_runtime_dir="${dev_run_root}/runtime"
|
||||||
|
|
||||||
|
if [[ -z "${BLITZ_RUN_ROOT:-}" || "${BLITZ_RUN_ROOT}" == "${dev_run_root}" ]]; then
|
||||||
|
export BLITZ_RUN_ROOT="/var/log/blitz-robot"
|
||||||
|
fi
|
||||||
|
if [[ -z "${BLITZ_RUNTIME_DIR:-}" || "${BLITZ_RUNTIME_DIR}" == "${dev_runtime_dir}" ]]; then
|
||||||
|
export BLITZ_RUNTIME_DIR="/run/blitz-robot"
|
||||||
|
fi
|
||||||
|
if [[ -z "${BLITZ_RUN_CONTEXT_FILE:-}" || "${BLITZ_RUN_CONTEXT_FILE}" == "${dev_runtime_dir}/run-context.env" ]]; then
|
||||||
|
export BLITZ_RUN_CONTEXT_FILE="${BLITZ_RUNTIME_DIR}/run-context.env"
|
||||||
|
fi
|
||||||
|
if [[ -z "${BLITZ_RUN_ID_FILE:-}" || "${BLITZ_RUN_ID_FILE}" == "${dev_runtime_dir}/run-id" ]]; then
|
||||||
|
export BLITZ_RUN_ID_FILE="${BLITZ_RUNTIME_DIR}/run-id"
|
||||||
|
fi
|
||||||
|
if [[ -z "${BLITZ_CURRENT_RUN_LINK:-}" || "${BLITZ_CURRENT_RUN_LINK}" == "${dev_run_root}/current" ]]; then
|
||||||
|
export BLITZ_CURRENT_RUN_LINK="${BLITZ_RUN_ROOT}/current"
|
||||||
|
fi
|
||||||
|
|
||||||
|
default_time_server="$(blitz_host_from_addr "${ROBOT_SIDE_OMNISOCKET_SERVER_ADDR:-}" || true)"
|
||||||
|
|
||||||
|
export BLITZ_BOOT_DELAY_SEC="${BLITZ_BOOT_DELAY_SEC:-30}"
|
||||||
|
export BLITZ_RUN_ROOT="${BLITZ_RUN_ROOT:-/var/log/blitz-robot}"
|
||||||
|
export BLITZ_LOG_FILE="${BLITZ_LOG_FILE:-/var/log/blitz-robot/startup.log}"
|
||||||
|
export BLITZ_RUNTIME_DIR="${BLITZ_RUNTIME_DIR:-/run/blitz-robot}"
|
||||||
|
export BLITZ_RUN_CONTEXT_FILE="${BLITZ_RUN_CONTEXT_FILE:-${BLITZ_RUNTIME_DIR}/run-context.env}"
|
||||||
|
export BLITZ_RUN_ID_FILE="${BLITZ_RUN_ID_FILE:-${BLITZ_RUNTIME_DIR}/run-id}"
|
||||||
|
export BLITZ_CURRENT_RUN_LINK="${BLITZ_CURRENT_RUN_LINK:-${BLITZ_RUN_ROOT}/current}"
|
||||||
|
export BLITZ_5G_DIAL_DIR="${BLITZ_5G_DIAL_DIR:-${BOOT_SCRIPT_DIR}}"
|
||||||
|
export BLITZ_5G_SERIAL_PORT="${BLITZ_5G_SERIAL_PORT:-/dev/ttyUSB7}"
|
||||||
|
export BLITZ_5G_INTERFACE="${BLITZ_5G_INTERFACE:-}"
|
||||||
|
export BLITZ_5G_MODEM_SUBNET="${BLITZ_5G_MODEM_SUBNET:-192.168.224.0/22}"
|
||||||
|
export BLITZ_5G_GATEWAY="${BLITZ_5G_GATEWAY:-192.168.225.1}"
|
||||||
|
export BLITZ_5G_SKIP_DHCP="${BLITZ_5G_SKIP_DHCP:-0}"
|
||||||
|
export BLITZ_5G_REMOVE_DEFAULT_ROUTE="${BLITZ_5G_REMOVE_DEFAULT_ROUTE:-1}"
|
||||||
|
export BLITZ_5G_ROUTE_TARGETS="${BLITZ_5G_ROUTE_TARGETS:-106.55.173.235}"
|
||||||
|
export BLITZ_5G_INFO_JSON="${BLITZ_5G_INFO_JSON:-${BLITZ_5G_DIAL_DIR}/modem_network_info.json}"
|
||||||
|
export BLITZ_5G_DISABLE_INTERFACES="${BLITZ_5G_DISABLE_INTERFACES:-}"
|
||||||
|
export BLITZ_5G_SERIAL_WAIT_SEC="${BLITZ_5G_SERIAL_WAIT_SEC:-60}"
|
||||||
|
export BLITZ_5G_ROUTE_WAIT_SEC="${BLITZ_5G_ROUTE_WAIT_SEC:-30}"
|
||||||
|
export BLITZ_TIME_SERVER_IP="${BLITZ_TIME_SERVER_IP:-${default_time_server}}"
|
||||||
|
export BLITZ_ROS_USER="${BLITZ_ROS_USER:-nvidia}"
|
||||||
|
export BLITZ_ROS_SOCKET_WAIT_SEC="${BLITZ_ROS_SOCKET_WAIT_SEC:-20}"
|
||||||
|
export BLITZ_WATCHDOG_INTERVAL_SEC="${BLITZ_WATCHDOG_INTERVAL_SEC:-5}"
|
||||||
|
export BLITZ_HEALTH_STALE_SEC="${BLITZ_HEALTH_STALE_SEC:-15}"
|
||||||
|
export BLITZ_OMNID_THREAD_HEARTBEAT_TIMEOUT_SEC="${BLITZ_OMNID_THREAD_HEARTBEAT_TIMEOUT_SEC:-15}"
|
||||||
|
export BLITZ_KCP_STATS_INTERVAL_MS="${BLITZ_KCP_STATS_INTERVAL_MS:-1000}"
|
||||||
|
export BLITZ_CONTROL_LATENCY_LOG_ENABLED="${BLITZ_CONTROL_LATENCY_LOG_ENABLED:-1}"
|
||||||
|
export BLITZ_CONTROL_LATENCY_LOG_SAMPLE_MOD="${BLITZ_CONTROL_LATENCY_LOG_SAMPLE_MOD:-100}"
|
||||||
|
export BLITZ_5G_LINK_LOG_INTERVAL_SEC="${BLITZ_5G_LINK_LOG_INTERVAL_SEC:-5}"
|
||||||
|
export BLITZ_JSONL_FLUSH_INTERVAL_MS="${BLITZ_JSONL_FLUSH_INTERVAL_MS:-1000}"
|
||||||
|
export BLITZ_JSONL_FLUSH_BYTES="${BLITZ_JSONL_FLUSH_BYTES:-262144}"
|
||||||
|
export BLITZ_JSONL_ROTATE_BYTES="${BLITZ_JSONL_ROTATE_BYTES:-134217728}"
|
||||||
|
export BLITZ_JSONL_ROTATE_FILES="${BLITZ_JSONL_ROTATE_FILES:-8}"
|
||||||
|
export BLITZ_INCIDENT_COMMAND_TIMEOUT_SEC="${BLITZ_INCIDENT_COMMAND_TIMEOUT_SEC:-5}"
|
||||||
|
export BLITZ_INCIDENT_TOTAL_TIMEOUT_SEC="${BLITZ_INCIDENT_TOTAL_TIMEOUT_SEC:-30}"
|
||||||
|
export BLITZ_NETWORK_FAIL_THRESHOLD="${BLITZ_NETWORK_FAIL_THRESHOLD:-3}"
|
||||||
|
export BLITZ_NETWORK_RECOVERY_COOLDOWN_SEC="${BLITZ_NETWORK_RECOVERY_COOLDOWN_SEC:-30}"
|
||||||
|
export BLITZ_GPS_MONITOR_ENABLED="${BLITZ_GPS_MONITOR_ENABLED:-1}"
|
||||||
|
export BLITZ_GPS_DEVICE_GLOB="${BLITZ_GPS_DEVICE_GLOB:-/dev/ttyCH341USB*}"
|
||||||
|
export BLITZ_GPS_CHECK_INTERVAL_SEC="${BLITZ_GPS_CHECK_INTERVAL_SEC:-10}"
|
||||||
|
export BLITZ_GPS_RESTART_UNITS="${BLITZ_GPS_RESTART_UNITS:-gpsd.socket gpsd.service}"
|
||||||
|
export BLITZ_WATCHDOG_ALLOW_FAULT_INJECTION="${BLITZ_WATCHDOG_ALLOW_FAULT_INJECTION:-0}"
|
||||||
|
export BLITZ_BOOT_ENV_LOADED="1"
|
||||||
|
}
|
||||||
|
|
||||||
|
blitz_timestamp() {
|
||||||
|
date '+%Y-%m-%d %H:%M:%S%z'
|
||||||
|
}
|
||||||
|
|
||||||
|
blitz_sanitize_detail() {
|
||||||
|
local detail="${1:-}"
|
||||||
|
|
||||||
|
detail="${detail//$'\n'/ ; }"
|
||||||
|
detail="${detail//$'\r'/ }"
|
||||||
|
printf '%s' "${detail}"
|
||||||
|
}
|
||||||
|
|
||||||
|
blitz_log() {
|
||||||
|
local step="${1:-unknown-step}"
|
||||||
|
local action="${2:-unknown-action}"
|
||||||
|
local result="${3:-info}"
|
||||||
|
local details="${4:-}"
|
||||||
|
local exit_code="${5:-0}"
|
||||||
|
|
||||||
|
printf '%s | %s | %s | %s | %s | %s\n' \
|
||||||
|
"$(blitz_timestamp)" \
|
||||||
|
"${step}" \
|
||||||
|
"${action}" \
|
||||||
|
"${result}" \
|
||||||
|
"$(blitz_sanitize_detail "${details}")" \
|
||||||
|
"${exit_code}"
|
||||||
|
}
|
||||||
|
|
||||||
|
blitz_join_cmd() {
|
||||||
|
local cmd=()
|
||||||
|
local arg
|
||||||
|
|
||||||
|
for arg in "$@"; do
|
||||||
|
cmd+=("$(printf '%q' "${arg}")")
|
||||||
|
done
|
||||||
|
printf '%s' "${cmd[*]}"
|
||||||
|
}
|
||||||
|
|
||||||
|
blitz_require_command() {
|
||||||
|
local command_name="$1"
|
||||||
|
local step="${2:-precheck}"
|
||||||
|
|
||||||
|
if command -v "${command_name}" >/dev/null 2>&1; then
|
||||||
|
blitz_log "${step}" "require-command" "success" "command=${command_name}" 0
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
|
||||||
|
blitz_log "${step}" "require-command" "failure" "missing command: ${command_name}" 127
|
||||||
|
return 127
|
||||||
|
}
|
||||||
|
|
||||||
|
blitz_require_file() {
|
||||||
|
local path="$1"
|
||||||
|
local step="${2:-precheck}"
|
||||||
|
|
||||||
|
if [[ -f "${path}" ]]; then
|
||||||
|
blitz_log "${step}" "require-file" "success" "path=${path}" 0
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
|
||||||
|
blitz_log "${step}" "require-file" "failure" "missing file: ${path}" 1
|
||||||
|
return 1
|
||||||
|
}
|
||||||
|
|
||||||
|
blitz_require_executable() {
|
||||||
|
local path="$1"
|
||||||
|
local step="${2:-precheck}"
|
||||||
|
|
||||||
|
if [[ -x "${path}" ]]; then
|
||||||
|
blitz_log "${step}" "require-executable" "success" "path=${path}" 0
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
|
||||||
|
blitz_log "${step}" "require-executable" "failure" "missing executable: ${path}" 1
|
||||||
|
return 1
|
||||||
|
}
|
||||||
|
|
||||||
|
blitz_require_root() {
|
||||||
|
local step="${1:-precheck}"
|
||||||
|
|
||||||
|
if [[ "${EUID}" -eq 0 ]]; then
|
||||||
|
blitz_log "${step}" "require-root" "success" "uid=${EUID}" 0
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
|
||||||
|
blitz_log "${step}" "require-root" "failure" "root privileges are required" 1
|
||||||
|
return 1
|
||||||
|
}
|
||||||
|
|
||||||
|
blitz_run() {
|
||||||
|
local step="$1"
|
||||||
|
local action="$2"
|
||||||
|
local rc
|
||||||
|
shift 2
|
||||||
|
|
||||||
|
blitz_log "${step}" "${action}" "start" "$(blitz_join_cmd "$@")" 0
|
||||||
|
if "$@"; then
|
||||||
|
blitz_log "${step}" "${action}" "success" "$(blitz_join_cmd "$@")" 0
|
||||||
|
return 0
|
||||||
|
else
|
||||||
|
rc=$?
|
||||||
|
fi
|
||||||
|
|
||||||
|
blitz_log "${step}" "${action}" "failure" "$(blitz_join_cmd "$@")" "${rc}"
|
||||||
|
return "${rc}"
|
||||||
|
}
|
||||||
|
|
||||||
|
blitz_route_ready() {
|
||||||
|
local target_ip="$1"
|
||||||
|
local expected_interface="${2:-}"
|
||||||
|
local route_output
|
||||||
|
|
||||||
|
route_output="$(ip route get "${target_ip}" 2>&1 || true)"
|
||||||
|
if [[ -z "${route_output}" ]]; then
|
||||||
|
return 1
|
||||||
|
fi
|
||||||
|
if [[ "${route_output}" == *"unreachable"* || "${route_output}" == *"prohibit"* ]]; then
|
||||||
|
return 1
|
||||||
|
fi
|
||||||
|
if [[ -n "${expected_interface}" && "${route_output}" != *" dev ${expected_interface} "* && "${route_output}" != *" dev ${expected_interface}" ]]; then
|
||||||
|
return 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
printf '%s\n' "${route_output}"
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
|
||||||
|
blitz_interface_exists() {
|
||||||
|
local interface_name="${1:-}"
|
||||||
|
|
||||||
|
if [[ -z "${interface_name}" ]]; then
|
||||||
|
return 1
|
||||||
|
fi
|
||||||
|
ip link show dev "${interface_name}" >/dev/null 2>&1
|
||||||
|
}
|
||||||
|
|
||||||
|
blitz_read_5g_info_interface() {
|
||||||
|
local info_json="$1"
|
||||||
|
|
||||||
|
if [[ -z "${info_json}" || ! -f "${info_json}" ]]; then
|
||||||
|
return 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
python3 - "${info_json}" <<'PY'
|
||||||
|
import json
|
||||||
|
import sys
|
||||||
|
|
||||||
|
path = sys.argv[1]
|
||||||
|
|
||||||
|
try:
|
||||||
|
with open(path, "r", encoding="utf-8") as handle:
|
||||||
|
payload = json.load(handle)
|
||||||
|
except Exception:
|
||||||
|
raise SystemExit(1)
|
||||||
|
|
||||||
|
interface = str(payload.get("interface") or "").strip()
|
||||||
|
if not interface:
|
||||||
|
raise SystemExit(1)
|
||||||
|
|
||||||
|
print(interface)
|
||||||
|
PY
|
||||||
|
}
|
||||||
|
|
||||||
|
blitz_detect_5g_interface_from_subnet() {
|
||||||
|
local modem_subnet="${1:-${BLITZ_5G_MODEM_SUBNET:-}}"
|
||||||
|
|
||||||
|
if [[ -z "${modem_subnet}" ]]; then
|
||||||
|
return 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
python3 - "${modem_subnet}" <<'PY'
|
||||||
|
import ipaddress
|
||||||
|
import json
|
||||||
|
import subprocess
|
||||||
|
import sys
|
||||||
|
|
||||||
|
subnet = ipaddress.ip_network(sys.argv[1], strict=False)
|
||||||
|
skip = {"lo", "docker0", "l4tbr0"}
|
||||||
|
|
||||||
|
def priority(name: str) -> tuple[int, str]:
|
||||||
|
if name.startswith("enx"):
|
||||||
|
return (0, name)
|
||||||
|
if name.startswith("wwan"):
|
||||||
|
return (1, name)
|
||||||
|
if name.startswith("usb"):
|
||||||
|
return (2, name)
|
||||||
|
if name.startswith("eth"):
|
||||||
|
return (3, name)
|
||||||
|
return (9, name)
|
||||||
|
|
||||||
|
try:
|
||||||
|
output = subprocess.check_output(["ip", "-j", "-4", "addr", "show"], text=True)
|
||||||
|
payload = json.loads(output)
|
||||||
|
except Exception:
|
||||||
|
raise SystemExit(1)
|
||||||
|
|
||||||
|
candidates = []
|
||||||
|
for item in payload:
|
||||||
|
ifname = str(item.get("ifname") or "").strip()
|
||||||
|
if not ifname or ifname in skip:
|
||||||
|
continue
|
||||||
|
for addr in item.get("addr_info") or []:
|
||||||
|
if addr.get("family") != "inet":
|
||||||
|
continue
|
||||||
|
local = addr.get("local")
|
||||||
|
prefixlen = addr.get("prefixlen")
|
||||||
|
if not local or prefixlen is None:
|
||||||
|
continue
|
||||||
|
try:
|
||||||
|
iface = ipaddress.ip_interface(f"{local}/{prefixlen}")
|
||||||
|
except ValueError:
|
||||||
|
continue
|
||||||
|
if iface.ip in subnet:
|
||||||
|
candidates.append((priority(ifname), ifname))
|
||||||
|
break
|
||||||
|
|
||||||
|
if not candidates:
|
||||||
|
raise SystemExit(1)
|
||||||
|
|
||||||
|
candidates.sort(key=lambda item: item[0])
|
||||||
|
print(candidates[0][1])
|
||||||
|
PY
|
||||||
|
}
|
||||||
|
|
||||||
|
blitz_refresh_5g_info_json() {
|
||||||
|
local interface_name="$1"
|
||||||
|
local info_json="${2:-${BLITZ_5G_INFO_JSON:-}}"
|
||||||
|
|
||||||
|
if [[ -z "${interface_name}" || -z "${info_json}" ]]; then
|
||||||
|
return 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
python3 - "${interface_name}" "${info_json}" <<'PY'
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import subprocess
|
||||||
|
import sys
|
||||||
|
|
||||||
|
interface_name = sys.argv[1]
|
||||||
|
path = sys.argv[2]
|
||||||
|
|
||||||
|
try:
|
||||||
|
output = subprocess.check_output(["ip", "-j", "addr", "show", "dev", interface_name], text=True)
|
||||||
|
payload = json.loads(output)
|
||||||
|
except Exception:
|
||||||
|
raise SystemExit(1)
|
||||||
|
|
||||||
|
if not payload:
|
||||||
|
raise SystemExit(1)
|
||||||
|
|
||||||
|
item = payload[0]
|
||||||
|
ipv4 = []
|
||||||
|
ipv6 = []
|
||||||
|
for addr in item.get("addr_info") or []:
|
||||||
|
local = addr.get("local")
|
||||||
|
prefixlen = addr.get("prefixlen")
|
||||||
|
family = addr.get("family")
|
||||||
|
if not local or prefixlen is None:
|
||||||
|
continue
|
||||||
|
entry = f"{local}/{prefixlen}"
|
||||||
|
if family == "inet":
|
||||||
|
ipv4.append(entry)
|
||||||
|
elif family == "inet6":
|
||||||
|
ipv6.append(entry)
|
||||||
|
|
||||||
|
data = {
|
||||||
|
"interface": interface_name,
|
||||||
|
"ipv4": ipv4,
|
||||||
|
"ipv6": ipv6,
|
||||||
|
}
|
||||||
|
|
||||||
|
parent = os.path.dirname(path)
|
||||||
|
if parent:
|
||||||
|
os.makedirs(parent, exist_ok=True)
|
||||||
|
temp_path = f"{path}.tmp.{os.getpid()}"
|
||||||
|
with open(temp_path, "w", encoding="utf-8") as handle:
|
||||||
|
json.dump(data, handle, ensure_ascii=False, indent=2)
|
||||||
|
os.replace(temp_path, path)
|
||||||
|
PY
|
||||||
|
}
|
||||||
|
|
||||||
|
blitz_resolve_5g_interface() {
|
||||||
|
local explicit_interface="${BLITZ_5G_INTERFACE:-}"
|
||||||
|
local info_json="${BLITZ_5G_INFO_JSON:-}"
|
||||||
|
local recorded_interface=""
|
||||||
|
local detected_interface=""
|
||||||
|
|
||||||
|
if [[ -n "${explicit_interface}" ]]; then
|
||||||
|
if blitz_interface_exists "${explicit_interface}"; then
|
||||||
|
printf '%s\n' "${explicit_interface}"
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
return 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
recorded_interface="$(blitz_read_5g_info_interface "${info_json}" || true)"
|
||||||
|
if [[ -n "${recorded_interface}" ]] && blitz_interface_exists "${recorded_interface}"; then
|
||||||
|
printf '%s\n' "${recorded_interface}"
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
|
||||||
|
detected_interface="$(blitz_detect_5g_interface_from_subnet || true)"
|
||||||
|
if [[ -n "${detected_interface}" ]]; then
|
||||||
|
if [[ "${detected_interface}" != "${recorded_interface}" ]]; then
|
||||||
|
blitz_refresh_5g_info_json "${detected_interface}" "${info_json}" >/dev/null 2>&1 || true
|
||||||
|
fi
|
||||||
|
printf '%s\n' "${detected_interface}"
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
|
||||||
|
return 1
|
||||||
|
}
|
||||||
|
|
||||||
|
blitz_prepare_runtime_dir() {
|
||||||
|
local runtime_dir
|
||||||
|
|
||||||
|
blitz_load_boot_env
|
||||||
|
runtime_dir="${BLITZ_RUNTIME_DIR}"
|
||||||
|
|
||||||
|
mkdir -p "${runtime_dir}"
|
||||||
|
if [[ "${EUID}" -eq 0 ]]; then
|
||||||
|
chown "root:${BLITZ_ROS_USER}" "${runtime_dir}"
|
||||||
|
chmod 0775 "${runtime_dir}"
|
||||||
|
else
|
||||||
|
chmod 0775 "${runtime_dir}" 2>/dev/null || true
|
||||||
|
fi
|
||||||
|
blitz_log "runtime-dir" "prepare" "success" "path=${runtime_dir}" 0
|
||||||
|
}
|
||||||
|
|
||||||
|
blitz_prepare_run_root() {
|
||||||
|
local run_root
|
||||||
|
local run_dir
|
||||||
|
local incidents_dir
|
||||||
|
|
||||||
|
blitz_load_boot_env
|
||||||
|
run_root="${BLITZ_RUN_ROOT}"
|
||||||
|
run_dir="${run_root}/runs"
|
||||||
|
incidents_dir="${run_root}/incidents"
|
||||||
|
|
||||||
|
mkdir -p "${run_dir}" "${incidents_dir}"
|
||||||
|
if [[ "${EUID}" -eq 0 ]]; then
|
||||||
|
chown -R "root:${BLITZ_ROS_USER}" "${run_root}" 2>/dev/null || true
|
||||||
|
chmod 0775 "${run_root}" "${run_dir}" "${incidents_dir}" 2>/dev/null || true
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
blitz_load_run_context_env() {
|
||||||
|
local context_file="${1:-${BLITZ_RUN_CONTEXT_FILE:-}}"
|
||||||
|
|
||||||
|
if [[ -z "${context_file}" || ! -f "${context_file}" ]]; then
|
||||||
|
return 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
set -a
|
||||||
|
# shellcheck disable=SC1090
|
||||||
|
source "${context_file}"
|
||||||
|
set +a
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
|
||||||
|
blitz_read_run_id() {
|
||||||
|
local run_id_file="${BLITZ_RUN_ID_FILE:-}"
|
||||||
|
|
||||||
|
if [[ -z "${run_id_file}" || ! -f "${run_id_file}" ]]; then
|
||||||
|
return 1
|
||||||
|
fi
|
||||||
|
tr -d '\r\n' < "${run_id_file}"
|
||||||
|
}
|
||||||
|
|
||||||
|
blitz_utc_compact_timestamp() {
|
||||||
|
date -u '+%Y%m%dT%H%M%SZ'
|
||||||
|
}
|
||||||
|
|
||||||
|
blitz_new_run_id() {
|
||||||
|
printf '%s\n' "$(blitz_utc_compact_timestamp)"
|
||||||
|
}
|
||||||
|
|
||||||
|
blitz_new_incident_id() {
|
||||||
|
local prefix="${1:-incident}"
|
||||||
|
printf '%s-%s-%d\n' "${prefix}" "$(blitz_utc_compact_timestamp)" "$$"
|
||||||
|
}
|
||||||
|
|
||||||
|
blitz_new_instance_id() {
|
||||||
|
printf '%s-%d\n' "$(blitz_utc_compact_timestamp)" "$$"
|
||||||
|
}
|
||||||
|
|
||||||
|
blitz_git_commit() {
|
||||||
|
git -C "${OMNISOCKETGO_ROOT}" rev-parse HEAD 2>/dev/null || true
|
||||||
|
}
|
||||||
|
|
||||||
|
blitz_git_dirty_flag() {
|
||||||
|
if git -C "${OMNISOCKETGO_ROOT}" diff --quiet --ignore-submodules=dirty >/dev/null 2>&1; then
|
||||||
|
printf '0\n'
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
printf '1\n'
|
||||||
|
}
|
||||||
|
|
||||||
|
blitz_write_run_context() {
|
||||||
|
local run_id="$1"
|
||||||
|
local run_dir="$2"
|
||||||
|
local boot_id="$3"
|
||||||
|
local context_file="${BLITZ_RUN_CONTEXT_FILE}"
|
||||||
|
local id_file="${BLITZ_RUN_ID_FILE}"
|
||||||
|
local temp_context
|
||||||
|
local temp_info
|
||||||
|
local commit_hash
|
||||||
|
local dirty_flag
|
||||||
|
local started_at
|
||||||
|
|
||||||
|
commit_hash="$(blitz_git_commit)"
|
||||||
|
dirty_flag="$(blitz_git_dirty_flag)"
|
||||||
|
started_at="$(date -u '+%Y-%m-%dT%H:%M:%SZ')"
|
||||||
|
temp_context="${context_file}.tmp.$$"
|
||||||
|
temp_info="${run_dir}/run-info.json.tmp.$$"
|
||||||
|
|
||||||
|
mkdir -p "${run_dir}"
|
||||||
|
printf '%s\n' "${run_id}" > "${id_file}"
|
||||||
|
|
||||||
|
cat > "${temp_context}" <<EOF
|
||||||
|
BLITZ_RUN_ID=${run_id}
|
||||||
|
BLITZ_RUN_DIR=${run_dir}
|
||||||
|
BLITZ_BOOT_ID=${boot_id}
|
||||||
|
BLITZ_RUN_ROOT=${BLITZ_RUN_ROOT}
|
||||||
|
EOF
|
||||||
|
mv -f "${temp_context}" "${context_file}"
|
||||||
|
|
||||||
|
python3 - "${temp_info}" "${run_id}" "${run_dir}" "${boot_id}" "${started_at}" "${commit_hash}" "${dirty_flag}" "${HOSTNAME:-$(hostname)}" <<'PY'
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
|
||||||
|
path, run_id, run_dir, boot_id, started_at, commit_hash, dirty_flag, hostname = sys.argv[1:9]
|
||||||
|
payload = {
|
||||||
|
"run_id": run_id,
|
||||||
|
"run_dir": run_dir,
|
||||||
|
"boot_id": boot_id,
|
||||||
|
"started_at": started_at,
|
||||||
|
"hostname": hostname,
|
||||||
|
"git_commit": commit_hash,
|
||||||
|
"git_dirty": dirty_flag == "1",
|
||||||
|
"env": {
|
||||||
|
key: os.environ.get(key, "")
|
||||||
|
for key in sorted(os.environ)
|
||||||
|
if key.startswith(("BLITZ_", "OMNI_", "ROBOT_RECEIVER_"))
|
||||||
|
},
|
||||||
|
}
|
||||||
|
with open(path, "w", encoding="utf-8") as handle:
|
||||||
|
json.dump(payload, handle, ensure_ascii=False, indent=2, sort_keys=True)
|
||||||
|
PY
|
||||||
|
mv -f "${temp_info}" "${run_dir}/run-info.json"
|
||||||
|
ln -sfn "${run_dir}" "${BLITZ_CURRENT_RUN_LINK}"
|
||||||
|
}
|
||||||
|
|
||||||
|
blitz_init_run_context() {
|
||||||
|
local run_id
|
||||||
|
local boot_id
|
||||||
|
local run_dir
|
||||||
|
|
||||||
|
blitz_load_boot_env
|
||||||
|
blitz_prepare_runtime_dir
|
||||||
|
blitz_prepare_run_root
|
||||||
|
|
||||||
|
run_id="$(blitz_new_run_id)"
|
||||||
|
boot_id="$(cat /proc/sys/kernel/random/boot_id 2>/dev/null || blitz_new_run_id)"
|
||||||
|
run_dir="${BLITZ_RUN_ROOT}/runs/${run_id}"
|
||||||
|
|
||||||
|
export BLITZ_RUN_ID="${run_id}"
|
||||||
|
export BLITZ_RUN_DIR="${run_dir}"
|
||||||
|
export BLITZ_BOOT_ID="${boot_id}"
|
||||||
|
blitz_write_run_context "${run_id}" "${run_dir}" "${boot_id}"
|
||||||
|
blitz_log "run-context" "init" "success" "run_id=${run_id} run_dir=${run_dir}" 0
|
||||||
|
}
|
||||||
|
|
||||||
|
blitz_require_run_context() {
|
||||||
|
blitz_load_boot_env
|
||||||
|
if blitz_load_run_context_env; then
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
blitz_log "run-context" "load" "failure" "missing ${BLITZ_RUN_CONTEXT_FILE}" 1
|
||||||
|
return 1
|
||||||
|
}
|
||||||
|
|
||||||
|
blitz_ensure_instance_id() {
|
||||||
|
if [[ -n "${BLITZ_INSTANCE_ID:-}" ]]; then
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
export BLITZ_INSTANCE_ID="$(blitz_new_instance_id)"
|
||||||
|
}
|
||||||
|
|
||||||
|
blitz_jsonl_rotate_if_needed() {
|
||||||
|
local path="$1"
|
||||||
|
local max_bytes="${2:-${BLITZ_JSONL_ROTATE_BYTES:-0}}"
|
||||||
|
local max_files="${3:-${BLITZ_JSONL_ROTATE_FILES:-0}}"
|
||||||
|
local size=0
|
||||||
|
local index
|
||||||
|
|
||||||
|
if [[ -z "${path}" || ! -f "${path}" ]]; then
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
if (( max_bytes <= 0 || max_files <= 0 )); then
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
|
||||||
|
size="$(stat -c %s "${path}" 2>/dev/null || echo 0)"
|
||||||
|
if (( size < max_bytes )); then
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
|
||||||
|
for (( index=max_files; index>=1; index-- )); do
|
||||||
|
if [[ "${index}" -eq "${max_files}" ]]; then
|
||||||
|
rm -f "${path}.${index}"
|
||||||
|
fi
|
||||||
|
if [[ -f "${path}.${index}" ]]; then
|
||||||
|
mv -f "${path}.${index}" "${path}.$(( index + 1 ))"
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
mv -f "${path}" "${path}.1"
|
||||||
|
}
|
||||||
|
|
||||||
|
blitz_jsonl_append_line() {
|
||||||
|
local path="$1"
|
||||||
|
local line="$2"
|
||||||
|
|
||||||
|
mkdir -p "$(dirname "${path}")"
|
||||||
|
blitz_jsonl_rotate_if_needed "${path}"
|
||||||
|
printf '%s\n' "${line}" >> "${path}"
|
||||||
|
}
|
||||||
|
|
||||||
|
blitz_launch_incident_capture() {
|
||||||
|
local launch_script="${BOOT_SCRIPT_DIR}/blitz-incident-capture-launch.sh"
|
||||||
|
|
||||||
|
if [[ ! -f "${launch_script}" ]]; then
|
||||||
|
return 1
|
||||||
|
fi
|
||||||
|
/bin/bash "${launch_script}" "$@" >/dev/null 2>&1 || return 1
|
||||||
|
}
|
||||||
53
scripts/boot/disable-systemd.sh
Normal file
53
scripts/boot/disable-systemd.sh
Normal file
@@ -0,0 +1,53 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||||
|
# shellcheck disable=SC1091
|
||||||
|
source "${SCRIPT_DIR}/common.sh"
|
||||||
|
|
||||||
|
STEP="disable"
|
||||||
|
SYSTEMD_DEST_DIR="/etc/systemd/system"
|
||||||
|
UNITS=(
|
||||||
|
"blitz-watchdog.service"
|
||||||
|
"blitz-5g-link-logger.service"
|
||||||
|
"blitz-b-side-omnid.service"
|
||||||
|
"blitz-ros-receiver.service"
|
||||||
|
"blitz-5g-dial.service"
|
||||||
|
"blitz-run-context.service"
|
||||||
|
"blitz-boot-gate.service"
|
||||||
|
"blitz-robot.target"
|
||||||
|
)
|
||||||
|
|
||||||
|
stop_unit_if_present() {
|
||||||
|
local unit_name="$1"
|
||||||
|
local unit_path="${SYSTEMD_DEST_DIR}/${unit_name}"
|
||||||
|
|
||||||
|
if [[ ! -f "${unit_path}" ]]; then
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
blitz_run "${STEP}" "stop-unit" systemctl stop "${unit_name}" || true
|
||||||
|
}
|
||||||
|
|
||||||
|
disable_unit_if_present() {
|
||||||
|
local unit_name="$1"
|
||||||
|
local unit_path="${SYSTEMD_DEST_DIR}/${unit_name}"
|
||||||
|
|
||||||
|
if [[ ! -f "${unit_path}" ]]; then
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
blitz_run "${STEP}" "disable-unit" systemctl disable "${unit_name}" || true
|
||||||
|
}
|
||||||
|
|
||||||
|
blitz_load_boot_env
|
||||||
|
blitz_require_root "${STEP}"
|
||||||
|
blitz_require_command systemctl "${STEP}"
|
||||||
|
|
||||||
|
for unit_name in "${UNITS[@]}"; do
|
||||||
|
stop_unit_if_present "${unit_name}"
|
||||||
|
done
|
||||||
|
|
||||||
|
for unit_name in "${UNITS[@]}"; do
|
||||||
|
disable_unit_if_present "${unit_name}"
|
||||||
|
done
|
||||||
|
|
||||||
|
blitz_log "${STEP}" "complete" "success" "boot chain stopped and disabled; next reboot will not auto-start blitz services" 0
|
||||||
71
scripts/boot/install-systemd.sh
Normal file
71
scripts/boot/install-systemd.sh
Normal file
@@ -0,0 +1,71 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||||
|
# shellcheck disable=SC1091
|
||||||
|
source "${SCRIPT_DIR}/common.sh"
|
||||||
|
|
||||||
|
SYSTEMD_TEMPLATE_DIR="${SCRIPT_DIR}/systemd"
|
||||||
|
SYSTEMD_DEST_DIR="/etc/systemd/system"
|
||||||
|
|
||||||
|
render_template() {
|
||||||
|
local template_path="$1"
|
||||||
|
local output_path="$2"
|
||||||
|
|
||||||
|
sed \
|
||||||
|
-e "s|@OMNISOCKETGO_ROOT@|${OMNISOCKETGO_ROOT}|g" \
|
||||||
|
-e "s|@BLITZ_LOG_FILE@|${BLITZ_LOG_FILE}|g" \
|
||||||
|
-e "s|@BLITZ_ROS_USER@|${BLITZ_ROS_USER}|g" \
|
||||||
|
"${template_path}" > "${output_path}"
|
||||||
|
}
|
||||||
|
|
||||||
|
install_unit() {
|
||||||
|
local template_name="$1"
|
||||||
|
local temp_output
|
||||||
|
|
||||||
|
temp_output="$(mktemp)"
|
||||||
|
render_template "${SYSTEMD_TEMPLATE_DIR}/${template_name}" "${temp_output}"
|
||||||
|
install -m 0644 "${temp_output}" "${SYSTEMD_DEST_DIR}/${template_name%.in}"
|
||||||
|
rm -f "${temp_output}"
|
||||||
|
blitz_log "install" "install-unit" "success" "unit=${SYSTEMD_DEST_DIR}/${template_name%.in}" 0
|
||||||
|
}
|
||||||
|
|
||||||
|
remove_unit_if_present() {
|
||||||
|
local unit_name="$1"
|
||||||
|
local unit_path="${SYSTEMD_DEST_DIR}/${unit_name}"
|
||||||
|
|
||||||
|
if [[ ! -f "${unit_path}" ]]; then
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
|
||||||
|
systemctl disable --now "${unit_name}" >/dev/null 2>&1 || true
|
||||||
|
rm -f "${unit_path}"
|
||||||
|
blitz_log "install" "remove-unit" "success" "unit=${unit_path}" 0
|
||||||
|
}
|
||||||
|
|
||||||
|
blitz_load_boot_env
|
||||||
|
blitz_require_root "install"
|
||||||
|
blitz_require_command install "install"
|
||||||
|
blitz_require_command systemctl "install"
|
||||||
|
|
||||||
|
mkdir -p "${SYSTEMD_DEST_DIR}"
|
||||||
|
install -d -m 0755 "$(dirname "${BLITZ_LOG_FILE}")"
|
||||||
|
touch "${BLITZ_LOG_FILE}"
|
||||||
|
chmod 0644 "${BLITZ_LOG_FILE}"
|
||||||
|
blitz_log "install" "prepare-log-file" "success" "log_file=${BLITZ_LOG_FILE}" 0
|
||||||
|
blitz_prepare_runtime_dir
|
||||||
|
blitz_prepare_run_root
|
||||||
|
|
||||||
|
install_unit "blitz-boot-gate.service.in"
|
||||||
|
install_unit "blitz-run-context.service.in"
|
||||||
|
install_unit "blitz-5g-dial.service.in"
|
||||||
|
install_unit "blitz-5g-link-logger.service.in"
|
||||||
|
install_unit "blitz-ros-receiver.service.in"
|
||||||
|
install_unit "blitz-b-side-omnid.service.in"
|
||||||
|
install_unit "blitz-watchdog.service.in"
|
||||||
|
install_unit "blitz-robot.target.in"
|
||||||
|
remove_unit_if_present "blitz-time-sync.service"
|
||||||
|
|
||||||
|
blitz_run "install" "daemon-reload" systemctl daemon-reload
|
||||||
|
blitz_run "install" "enable-target" systemctl enable blitz-robot.target
|
||||||
|
blitz_log "install" "complete" "success" "run systemctl start blitz-robot.target to launch immediately" 0
|
||||||
9
scripts/boot/modem_network_info.json
Normal file
9
scripts/boot/modem_network_info.json
Normal file
@@ -0,0 +1,9 @@
|
|||||||
|
{
|
||||||
|
"interface": "enxb8f72c9e179a",
|
||||||
|
"ipv4": [
|
||||||
|
"192.168.225.160/22"
|
||||||
|
],
|
||||||
|
"ipv6": [
|
||||||
|
"fe80::52ae:a1c8:a9bb:a9a8/64"
|
||||||
|
]
|
||||||
|
}
|
||||||
12
scripts/boot/prepare-runtime-dir.sh
Normal file
12
scripts/boot/prepare-runtime-dir.sh
Normal file
@@ -0,0 +1,12 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||||
|
# shellcheck disable=SC1091
|
||||||
|
source "${SCRIPT_DIR}/common.sh"
|
||||||
|
|
||||||
|
STEP="runtime-dir"
|
||||||
|
|
||||||
|
blitz_load_boot_env
|
||||||
|
blitz_prepare_runtime_dir
|
||||||
|
blitz_log "${STEP}" "complete" "success" "runtime_dir=${BLITZ_RUNTIME_DIR}" 0
|
||||||
854
scripts/boot/rndis_dial.py
Normal file
854
scripts/boot/rndis_dial.py
Normal file
@@ -0,0 +1,854 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""RM520N-GL RNDIS 自动拨号脚本。
|
||||||
|
|
||||||
|
流程:
|
||||||
|
1. 检测 USB 设备是否存在
|
||||||
|
2. 打开 AT 口并检查 SIM 状态
|
||||||
|
3. 配置 RNDIS 模式: AT+QCFG="usbnet",3
|
||||||
|
4. 重启模块: AT+CFUN=1,1
|
||||||
|
5. 等待模块重新枚举并识别 5G 网卡
|
||||||
|
6. 如果网卡还没有 IPv4, 自动尝试 DHCP
|
||||||
|
|
||||||
|
用法:
|
||||||
|
sudo python3 rndis_dial.py
|
||||||
|
sudo python3 rndis_dial.py --serial-port /dev/ttyUSB7
|
||||||
|
sudo python3 rndis_dial.py --interface eth0 #指定网口
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import errno
|
||||||
|
import ipaddress
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import select
|
||||||
|
import shlex
|
||||||
|
import shutil
|
||||||
|
import subprocess
|
||||||
|
import sys
|
||||||
|
import termios
|
||||||
|
import time
|
||||||
|
import tty
|
||||||
|
|
||||||
|
USB_ID = "2c7c:0801"
|
||||||
|
DEFAULT_SERIAL_PORT = "/dev/ttyUSB7" #串口设备节点
|
||||||
|
DEFAULT_BAUD_RATE = 115200
|
||||||
|
CHECK_INTERVAL = 2
|
||||||
|
SERIAL_READ_TIMEOUT = 0.2
|
||||||
|
SERIAL_POLL_INTERVAL = 0.1
|
||||||
|
SERIAL_SETTLE_DELAY = 0.3
|
||||||
|
AT_SYNC_RETRIES = 3
|
||||||
|
AT_SYNC_TIMEOUT = 2.5
|
||||||
|
# 示例地址 192.168.225.38/22 所在网段。
|
||||||
|
# 拨号成功后会用这个网段来最终确认哪个接口是 5G 模组。
|
||||||
|
DEFAULT_MODEM_SUBNET = "192.168.224.0/22"
|
||||||
|
DEFAULT_MODEM_GATEWAY = "192.168.225.1"
|
||||||
|
DEFAULT_PUBLIC_TARGETS = ("81.70.156.140", "106.55.173.235")
|
||||||
|
DEFAULT_INFO_JSON = "modem_network_info.json"
|
||||||
|
SKIP_INTERFACES = {"lo", "docker0", "l4tbr0"}
|
||||||
|
BAUD_RATE_MAP = {
|
||||||
|
9600: termios.B9600,
|
||||||
|
19200: termios.B19200,
|
||||||
|
38400: termios.B38400,
|
||||||
|
57600: termios.B57600,
|
||||||
|
115200: termios.B115200,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def run_cmd(cmd, timeout=30, check=False):
|
||||||
|
print(f"[CMD] {format_shell_cmd(cmd)}")
|
||||||
|
result = subprocess.run(
|
||||||
|
cmd,
|
||||||
|
capture_output=True,
|
||||||
|
text=True,
|
||||||
|
timeout=timeout,
|
||||||
|
check=False,
|
||||||
|
)
|
||||||
|
output = (result.stdout or "") + (result.stderr or "")
|
||||||
|
if check and result.returncode != 0:
|
||||||
|
raise RuntimeError(f"命令执行失败: {' '.join(cmd)}\n{output.strip()}")
|
||||||
|
return result.returncode, output.strip()
|
||||||
|
|
||||||
|
|
||||||
|
def format_shell_cmd(cmd):
|
||||||
|
"""把命令参数格式化成可直接阅读的 shell 形式。"""
|
||||||
|
return " ".join(shlex.quote(part) for part in cmd)
|
||||||
|
|
||||||
|
|
||||||
|
def parse_ipv4_address(value):
|
||||||
|
try:
|
||||||
|
return str(ipaddress.IPv4Address(value))
|
||||||
|
except ipaddress.AddressValueError as exc:
|
||||||
|
raise argparse.ArgumentTypeError(f"无效的 IPv4 地址: {value}") from exc
|
||||||
|
|
||||||
|
|
||||||
|
def dedupe_keep_order(values):
|
||||||
|
seen = set()
|
||||||
|
result = []
|
||||||
|
for value in values:
|
||||||
|
if value in seen:
|
||||||
|
continue
|
||||||
|
seen.add(value)
|
||||||
|
result.append(value)
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
def require_root():
|
||||||
|
if os.geteuid() != 0:
|
||||||
|
print("[FAIL] 请使用 sudo 运行此脚本")
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
|
||||||
|
def require_commands():
|
||||||
|
missing = [cmd for cmd in ("lsusb", "ip") if shutil.which(cmd) is None]
|
||||||
|
if missing:
|
||||||
|
print(f"[FAIL] 缺少系统命令: {', '.join(missing)}")
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
|
||||||
|
def usb_device_present():
|
||||||
|
# 1. 第一次检测 lsusb,确认模块已经被系统识别。
|
||||||
|
"""通过 lsusb 检查模块是否已经被系统识别。"""
|
||||||
|
code, output = run_cmd(["lsusb"], timeout=10)
|
||||||
|
if code != 0:
|
||||||
|
return False, output
|
||||||
|
|
||||||
|
for line in output.splitlines():
|
||||||
|
if USB_ID in line:
|
||||||
|
return True, line.strip()
|
||||||
|
return False, output
|
||||||
|
|
||||||
|
|
||||||
|
def wait_for_usb_device(expected_present, timeout):
|
||||||
|
"""等待模块 USB 设备下线或重新上线。"""
|
||||||
|
deadline = time.time() + timeout
|
||||||
|
last_seen = ""
|
||||||
|
while time.time() < deadline:
|
||||||
|
present, detail = usb_device_present()
|
||||||
|
last_seen = detail
|
||||||
|
if present == expected_present:
|
||||||
|
return True, detail
|
||||||
|
time.sleep(CHECK_INTERVAL)
|
||||||
|
return False, last_seen
|
||||||
|
|
||||||
|
|
||||||
|
def wait_for_path(path, timeout):
|
||||||
|
"""等待串口节点或其他路径重新出现。"""
|
||||||
|
deadline = time.time() + timeout
|
||||||
|
while time.time() < deadline:
|
||||||
|
if os.path.exists(path):
|
||||||
|
return True
|
||||||
|
time.sleep(1)
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def normalize_serial_output(text):
|
||||||
|
"""整理串口原始输出,便于后续匹配关键字。"""
|
||||||
|
cleaned = text.replace("\r", "\n")
|
||||||
|
return "\n".join(line for line in cleaned.splitlines() if line.strip()).strip()
|
||||||
|
|
||||||
|
|
||||||
|
def serial_response_complete(text):
|
||||||
|
if not text:
|
||||||
|
return False
|
||||||
|
|
||||||
|
for line in reversed(text.splitlines()):
|
||||||
|
stripped = line.strip()
|
||||||
|
if stripped == "OK":
|
||||||
|
return True
|
||||||
|
if "ERROR" in stripped:
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
class RawSerialSession:
|
||||||
|
"""使用 Python 标准库直接控制 Linux 串口,尽量贴近 stty/raw 行为。"""
|
||||||
|
|
||||||
|
def __init__(self, port, baudrate):
|
||||||
|
if baudrate not in BAUD_RATE_MAP:
|
||||||
|
raise RuntimeError(f"不支持的波特率: {baudrate}")
|
||||||
|
|
||||||
|
self.port = port
|
||||||
|
self.fd = None
|
||||||
|
self._original_attrs = None
|
||||||
|
|
||||||
|
try:
|
||||||
|
self.fd = os.open(port, os.O_RDWR | os.O_NOCTTY | os.O_NONBLOCK)
|
||||||
|
self._original_attrs = termios.tcgetattr(self.fd)
|
||||||
|
tty.setraw(self.fd, when=termios.TCSANOW)
|
||||||
|
|
||||||
|
attrs = termios.tcgetattr(self.fd)
|
||||||
|
attrs[0] = 0
|
||||||
|
attrs[1] = 0
|
||||||
|
attrs[2] &= ~(termios.PARENB | termios.CSTOPB | termios.CSIZE)
|
||||||
|
attrs[2] |= termios.CS8 | termios.CLOCAL | termios.CREAD
|
||||||
|
attrs[3] = 0
|
||||||
|
attrs[4] = BAUD_RATE_MAP[baudrate]
|
||||||
|
attrs[5] = BAUD_RATE_MAP[baudrate]
|
||||||
|
attrs[6][termios.VMIN] = 0
|
||||||
|
attrs[6][termios.VTIME] = 0
|
||||||
|
termios.tcsetattr(self.fd, termios.TCSANOW, attrs)
|
||||||
|
termios.tcflush(self.fd, termios.TCIOFLUSH)
|
||||||
|
except OSError as exc:
|
||||||
|
self.close()
|
||||||
|
raise RuntimeError(f"无法打开串口 {port}: {exc}") from exc
|
||||||
|
|
||||||
|
@property
|
||||||
|
def is_open(self):
|
||||||
|
return self.fd is not None
|
||||||
|
|
||||||
|
def reset_input_buffer(self):
|
||||||
|
if self.fd is not None:
|
||||||
|
termios.tcflush(self.fd, termios.TCIFLUSH)
|
||||||
|
|
||||||
|
def reset_output_buffer(self):
|
||||||
|
if self.fd is not None:
|
||||||
|
termios.tcflush(self.fd, termios.TCOFLUSH)
|
||||||
|
|
||||||
|
def write(self, data):
|
||||||
|
if self.fd is None:
|
||||||
|
raise OSError("串口未打开")
|
||||||
|
|
||||||
|
sent = 0
|
||||||
|
while sent < len(data):
|
||||||
|
try:
|
||||||
|
written = os.write(self.fd, data[sent:])
|
||||||
|
except BlockingIOError:
|
||||||
|
time.sleep(SERIAL_POLL_INTERVAL)
|
||||||
|
continue
|
||||||
|
if written <= 0:
|
||||||
|
raise OSError("串口写入返回 0 字节")
|
||||||
|
sent += written
|
||||||
|
|
||||||
|
def flush(self):
|
||||||
|
if self.fd is not None:
|
||||||
|
termios.tcdrain(self.fd)
|
||||||
|
|
||||||
|
def read_chunk(self, timeout, size=4096):
|
||||||
|
if self.fd is None:
|
||||||
|
return b""
|
||||||
|
|
||||||
|
ready, _, _ = select.select([self.fd], [], [], timeout)
|
||||||
|
if not ready:
|
||||||
|
return b""
|
||||||
|
|
||||||
|
try:
|
||||||
|
return os.read(self.fd, size)
|
||||||
|
except BlockingIOError:
|
||||||
|
return b""
|
||||||
|
|
||||||
|
def close(self):
|
||||||
|
if self.fd is None:
|
||||||
|
return
|
||||||
|
|
||||||
|
fd = self.fd
|
||||||
|
self.fd = None
|
||||||
|
|
||||||
|
if self._original_attrs is not None:
|
||||||
|
try:
|
||||||
|
termios.tcsetattr(fd, termios.TCSANOW, self._original_attrs)
|
||||||
|
except termios.error:
|
||||||
|
pass
|
||||||
|
os.close(fd)
|
||||||
|
|
||||||
|
|
||||||
|
def read_serial_output(session, timeout, allow_disconnect=False):
|
||||||
|
"""在给定时间窗口内读取 AT 响应,直到出现结束标记或超时。"""
|
||||||
|
deadline = time.time() + timeout
|
||||||
|
chunks = []
|
||||||
|
saw_terminal_line = False
|
||||||
|
last_data_time = None
|
||||||
|
|
||||||
|
while time.time() < deadline:
|
||||||
|
try:
|
||||||
|
chunk = session.read_chunk(timeout=min(SERIAL_READ_TIMEOUT, max(deadline - time.time(), 0)))
|
||||||
|
except OSError as exc:
|
||||||
|
if allow_disconnect and exc.errno in (errno.EIO, errno.ENODEV, errno.EBADF):
|
||||||
|
break
|
||||||
|
raise RuntimeError(f"读取串口响应失败: {exc}") from exc
|
||||||
|
|
||||||
|
if chunk:
|
||||||
|
chunks.append(chunk.decode(errors="ignore"))
|
||||||
|
last_data_time = time.time()
|
||||||
|
current_text = normalize_serial_output("".join(chunks))
|
||||||
|
if serial_response_complete(current_text):
|
||||||
|
saw_terminal_line = True
|
||||||
|
continue
|
||||||
|
|
||||||
|
if saw_terminal_line and last_data_time is not None and time.time() - last_data_time >= SERIAL_SETTLE_DELAY:
|
||||||
|
break
|
||||||
|
|
||||||
|
time.sleep(SERIAL_POLL_INTERVAL)
|
||||||
|
|
||||||
|
return normalize_serial_output("".join(chunks))
|
||||||
|
|
||||||
|
|
||||||
|
def open_serial_session(port):
|
||||||
|
"""打开 AT 串口会话,后续在同一连接里顺序发送多条命令。"""
|
||||||
|
ser = RawSerialSession(port=port, baudrate=DEFAULT_BAUD_RATE)
|
||||||
|
time.sleep(0.2)
|
||||||
|
ser.reset_input_buffer()
|
||||||
|
ser.reset_output_buffer()
|
||||||
|
return ser
|
||||||
|
|
||||||
|
|
||||||
|
def execute_serial_step(ser, command, expect=None, timeout=3, allow_disconnect=False):
|
||||||
|
"""在当前串口会话里发送一条 AT 命令并校验响应。"""
|
||||||
|
print(f"[AT] {command}")
|
||||||
|
try:
|
||||||
|
ser.reset_input_buffer()
|
||||||
|
ser.write((command + "\r").encode())
|
||||||
|
ser.flush()
|
||||||
|
except OSError as exc:
|
||||||
|
raise RuntimeError(f"AT 命令 `{command}` 发送失败: {exc}") from exc
|
||||||
|
|
||||||
|
response = read_serial_output(ser, timeout=timeout, allow_disconnect=allow_disconnect)
|
||||||
|
|
||||||
|
if response:
|
||||||
|
print(response)
|
||||||
|
else:
|
||||||
|
print("(无响应)")
|
||||||
|
|
||||||
|
if "ERROR" in response:
|
||||||
|
raise RuntimeError(f"AT 命令 `{command}` 执行失败: {response}")
|
||||||
|
if expect and expect not in response and not allow_disconnect:
|
||||||
|
raise RuntimeError(f"AT 命令 `{command}` 响应异常: {response or '空响应'}")
|
||||||
|
return response
|
||||||
|
|
||||||
|
|
||||||
|
def synchronize_at_channel(ser):
|
||||||
|
"""某些模组 AT 口在刚打开时需要先用 AT 做一次预热。"""
|
||||||
|
last_error = None
|
||||||
|
|
||||||
|
for attempt in range(1, AT_SYNC_RETRIES + 1):
|
||||||
|
try:
|
||||||
|
print(f"[INFO] 预热 AT 通道,第 {attempt} 次")
|
||||||
|
response = execute_serial_step(ser, "AT", expect="OK", timeout=AT_SYNC_TIMEOUT)
|
||||||
|
if "OK" in response:
|
||||||
|
return
|
||||||
|
except RuntimeError as exc:
|
||||||
|
last_error = exc
|
||||||
|
time.sleep(0.5)
|
||||||
|
|
||||||
|
if last_error is not None:
|
||||||
|
raise RuntimeError(
|
||||||
|
"AT 通道预热失败,请确认串口是否是 AT 命令口,例如 /dev/ttyUSB2"
|
||||||
|
) from last_error
|
||||||
|
raise RuntimeError("AT 通道预热失败")
|
||||||
|
|
||||||
|
|
||||||
|
def run_serial_steps(port, steps):
|
||||||
|
"""在同一个串口会话里顺序执行多条 AT 命令。"""
|
||||||
|
ser = None
|
||||||
|
|
||||||
|
try:
|
||||||
|
ser = open_serial_session(port)
|
||||||
|
synchronize_at_channel(ser)
|
||||||
|
for step in steps:
|
||||||
|
execute_serial_step(
|
||||||
|
ser,
|
||||||
|
step["command"],
|
||||||
|
expect=step.get("expect"),
|
||||||
|
timeout=step.get("timeout", 3),
|
||||||
|
allow_disconnect=step.get("allow_disconnect", False),
|
||||||
|
)
|
||||||
|
finally:
|
||||||
|
if ser is not None and ser.is_open:
|
||||||
|
ser.close()
|
||||||
|
|
||||||
|
def configure_rndis(port):
|
||||||
|
# 2. 用 Python 串口库在同一会话里顺序执行拨号相关 AT 命令。
|
||||||
|
"""切换到 RNDIS 模式并触发模块重启。"""
|
||||||
|
if not wait_for_path(port, timeout=30):
|
||||||
|
raise RuntimeError(f"串口不存在: {port}")
|
||||||
|
|
||||||
|
print(f"[OK] 串口已打开: {port}")
|
||||||
|
run_serial_steps(
|
||||||
|
port,
|
||||||
|
[
|
||||||
|
{"command": "AT+CPIN?", "expect": "READY", "timeout": 4},
|
||||||
|
{"command": 'AT+QCFG="usbnet",3', "expect": "OK", "timeout": 5},
|
||||||
|
{"command": "AT+CFUN=1,1", "timeout": 4, "allow_disconnect": True},
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def get_interfaces():
|
||||||
|
"""列出当前系统中的接口,过滤明显无关的本地接口。"""
|
||||||
|
interfaces = []
|
||||||
|
try:
|
||||||
|
for name in os.listdir("/sys/class/net"):
|
||||||
|
if name in SKIP_INTERFACES or is_usb_gadget(name):
|
||||||
|
continue
|
||||||
|
interfaces.append(name)
|
||||||
|
except FileNotFoundError:
|
||||||
|
return []
|
||||||
|
return sorted(interfaces)
|
||||||
|
|
||||||
|
|
||||||
|
def is_usb_gadget(iface):
|
||||||
|
"""过滤 Jetson 自己暴露出去的 gadget 网卡。"""
|
||||||
|
sysfs_path = f"/sys/class/net/{iface}"
|
||||||
|
if not os.path.exists(sysfs_path):
|
||||||
|
return False
|
||||||
|
return "/gadget/" in os.path.realpath(sysfs_path)
|
||||||
|
|
||||||
|
|
||||||
|
def is_usb_network_interface(iface):
|
||||||
|
"""判断接口是否来自 USB 设备。"""
|
||||||
|
device_path = f"/sys/class/net/{iface}/device"
|
||||||
|
if not os.path.exists(device_path):
|
||||||
|
return False
|
||||||
|
real_path = os.path.realpath(device_path)
|
||||||
|
return "/usb" in real_path
|
||||||
|
|
||||||
|
|
||||||
|
def get_ipv4_addrs():
|
||||||
|
"""返回所有接口的 IPv4/CIDR 信息。"""
|
||||||
|
code, output = run_cmd(["ip", "-o", "-4", "addr", "show"], timeout=10)
|
||||||
|
if code != 0:
|
||||||
|
return {}
|
||||||
|
|
||||||
|
ipv4_addrs = {}
|
||||||
|
for line in output.splitlines():
|
||||||
|
parts = line.split()
|
||||||
|
if len(parts) >= 4:
|
||||||
|
iface = parts[1]
|
||||||
|
ipv4_addrs.setdefault(iface, []).append(parts[3])
|
||||||
|
return ipv4_addrs
|
||||||
|
|
||||||
|
|
||||||
|
def get_ipv6_addrs():
|
||||||
|
"""返回所有接口的 IPv6/CIDR 信息。"""
|
||||||
|
code, output = run_cmd(["ip", "-o", "-6", "addr", "show"], timeout=10)
|
||||||
|
if code != 0:
|
||||||
|
return {}
|
||||||
|
|
||||||
|
ipv6_addrs = {}
|
||||||
|
for line in output.splitlines():
|
||||||
|
parts = line.split()
|
||||||
|
if len(parts) >= 4:
|
||||||
|
iface = parts[1]
|
||||||
|
ipv6_addrs.setdefault(iface, []).append(parts[3])
|
||||||
|
return ipv6_addrs
|
||||||
|
|
||||||
|
|
||||||
|
def interface_priority(iface):
|
||||||
|
if iface.startswith("wwan"):
|
||||||
|
return 0
|
||||||
|
if iface.startswith("enx"):
|
||||||
|
return 1
|
||||||
|
if iface.startswith("usb"):
|
||||||
|
return 2
|
||||||
|
return 10
|
||||||
|
|
||||||
|
|
||||||
|
def list_usb_network_candidates(explicit_iface=None):
|
||||||
|
"""列出拨号前可尝试的 USB 网卡候选项。
|
||||||
|
|
||||||
|
这里不靠固定网口名确认 5G 模组,只是在还没有 IP 的时候先缩小范围。
|
||||||
|
真正确认模组接口,会在 DHCP 之后根据 IP 网段判断。
|
||||||
|
"""
|
||||||
|
candidates = []
|
||||||
|
|
||||||
|
for iface in get_interfaces():
|
||||||
|
if explicit_iface and iface != explicit_iface:
|
||||||
|
continue
|
||||||
|
if not is_usb_network_interface(iface):
|
||||||
|
continue
|
||||||
|
candidates.append((interface_priority(iface), iface))
|
||||||
|
|
||||||
|
if not candidates:
|
||||||
|
return []
|
||||||
|
|
||||||
|
candidates.sort()
|
||||||
|
return [iface for _, iface in candidates]
|
||||||
|
|
||||||
|
|
||||||
|
def ip_in_subnet(ip_cidr, subnet):
|
||||||
|
"""判断接口地址是否落在指定网段内。"""
|
||||||
|
try:
|
||||||
|
return ipaddress.ip_interface(ip_cidr).ip in ipaddress.ip_network(subnet, strict=False)
|
||||||
|
except ValueError:
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def find_interface_by_subnet(modem_subnet, explicit_iface=None):
|
||||||
|
"""拨号成功后,通过 IP 网段确认 5G 模组网卡。"""
|
||||||
|
candidates = []
|
||||||
|
for iface, addrs in get_ipv4_addrs().items():
|
||||||
|
if iface in SKIP_INTERFACES or is_usb_gadget(iface):
|
||||||
|
continue
|
||||||
|
if not is_usb_network_interface(iface):
|
||||||
|
continue
|
||||||
|
if explicit_iface and iface != explicit_iface:
|
||||||
|
continue
|
||||||
|
|
||||||
|
matched_addrs = [addr for addr in addrs if ip_in_subnet(addr, modem_subnet)]
|
||||||
|
if matched_addrs:
|
||||||
|
candidates.append((interface_priority(iface), iface, matched_addrs))
|
||||||
|
|
||||||
|
if not candidates:
|
||||||
|
return None, []
|
||||||
|
|
||||||
|
candidates.sort()
|
||||||
|
_, iface, matched_addrs = candidates[0]
|
||||||
|
return iface, matched_addrs
|
||||||
|
|
||||||
|
|
||||||
|
def wait_for_usb_candidates(explicit_iface=None, timeout=90):
|
||||||
|
"""等待模块枚举出 USB 网卡候选项。"""
|
||||||
|
deadline = time.time() + timeout
|
||||||
|
while time.time() < deadline:
|
||||||
|
candidates = list_usb_network_candidates(explicit_iface=explicit_iface)
|
||||||
|
if candidates:
|
||||||
|
return candidates
|
||||||
|
time.sleep(CHECK_INTERVAL)
|
||||||
|
return []
|
||||||
|
|
||||||
|
|
||||||
|
def bring_interface_up(iface):
|
||||||
|
code, output = run_cmd(["ip", "link", "set", "dev", iface, "up"], timeout=10)
|
||||||
|
if code != 0:
|
||||||
|
raise RuntimeError(f"拉起网卡失败: {iface}\n{output}")
|
||||||
|
|
||||||
|
|
||||||
|
def renew_dhcp(iface):
|
||||||
|
dhclient = shutil.which("dhclient")
|
||||||
|
udhcpc = shutil.which("udhcpc")
|
||||||
|
|
||||||
|
if dhclient:
|
||||||
|
print(f"[INFO] 使用 dhclient 为 {iface} 获取 IP")
|
||||||
|
code, output = run_cmd(["dhclient", "-1", "-v", iface], timeout=45)
|
||||||
|
return code == 0, output
|
||||||
|
|
||||||
|
if udhcpc:
|
||||||
|
print(f"[INFO] 使用 udhcpc 为 {iface} 获取 IP")
|
||||||
|
code, output = run_cmd(["udhcpc", "-n", "-q", "-i", iface], timeout=45)
|
||||||
|
return code == 0, output
|
||||||
|
|
||||||
|
return False, "系统中未找到 dhclient 或 udhcpc"
|
||||||
|
|
||||||
|
|
||||||
|
def get_default_routes(iface):
|
||||||
|
code, output = run_cmd(["ip", "-o", "route", "show", "default", "dev", iface], timeout=10)
|
||||||
|
if code != 0:
|
||||||
|
return []
|
||||||
|
return [line.strip() for line in output.splitlines() if line.strip()]
|
||||||
|
|
||||||
|
|
||||||
|
def resolve_gateway(iface, fallback_gateway):
|
||||||
|
for route in get_default_routes(iface):
|
||||||
|
tokens = route.split()
|
||||||
|
for index, token in enumerate(tokens[:-1]):
|
||||||
|
if token == "via":
|
||||||
|
gateway = tokens[index + 1]
|
||||||
|
print(f"[INFO] 从默认路由检测到 {iface} 网关: {gateway}")
|
||||||
|
return gateway
|
||||||
|
|
||||||
|
print(f"[INFO] 未从默认路由检测到 {iface} 网关,回退到 {fallback_gateway}")
|
||||||
|
return fallback_gateway
|
||||||
|
|
||||||
|
|
||||||
|
def delete_default_routes(iface):
|
||||||
|
removed = 0
|
||||||
|
|
||||||
|
while True:
|
||||||
|
routes = get_default_routes(iface)
|
||||||
|
if not routes:
|
||||||
|
return removed
|
||||||
|
|
||||||
|
deleted_this_round = False
|
||||||
|
for route in routes:
|
||||||
|
cmd = ["ip", "route", "del", *route.split()]
|
||||||
|
code, output = run_cmd(cmd, timeout=10)
|
||||||
|
if code != 0:
|
||||||
|
code, output = run_cmd(["ip", "route", "del", "default", "dev", iface], timeout=10)
|
||||||
|
if code != 0:
|
||||||
|
raise RuntimeError(f"删除默认路由失败: {iface}\n{output}")
|
||||||
|
removed += 1
|
||||||
|
deleted_this_round = True
|
||||||
|
|
||||||
|
if not deleted_this_round:
|
||||||
|
raise RuntimeError(f"未能删除 {iface} 的默认路由")
|
||||||
|
|
||||||
|
|
||||||
|
def install_host_routes(iface, gateway, targets):
|
||||||
|
for target in dedupe_keep_order(targets):
|
||||||
|
cmd = ["ip", "route", "replace", f"{target}/32", "via", gateway, "dev", iface]
|
||||||
|
code, output = run_cmd(cmd, timeout=10)
|
||||||
|
if code != 0:
|
||||||
|
raise RuntimeError(f"添加主机路由失败: {target} via {gateway} dev {iface}\n{output}")
|
||||||
|
|
||||||
|
print(f"[OK] 已添加主机路由: {target}/32 via {gateway} dev {iface}")
|
||||||
|
|
||||||
|
|
||||||
|
def enforce_route_policy(iface, fallback_gateway, route_targets):
|
||||||
|
gateway = resolve_gateway(iface, fallback_gateway)
|
||||||
|
removed = delete_default_routes(iface)
|
||||||
|
print(f"[OK] 已删除 {iface} 上的 {removed} 条默认路由")
|
||||||
|
|
||||||
|
if route_targets:
|
||||||
|
install_host_routes(iface, gateway, route_targets)
|
||||||
|
else:
|
||||||
|
print(f"[WARN] {iface} 未配置任何主机路由目标,5G 将不再承载公网流量")
|
||||||
|
|
||||||
|
|
||||||
|
def ensure_ipv4(iface):
|
||||||
|
"""为指定接口申请 IPv4 地址。"""
|
||||||
|
ipv4_addrs = get_ipv4_addrs().get(iface, [])
|
||||||
|
if ipv4_addrs:
|
||||||
|
return ipv4_addrs
|
||||||
|
|
||||||
|
bring_interface_up(iface)
|
||||||
|
ok, output = renew_dhcp(iface)
|
||||||
|
if output:
|
||||||
|
print(output)
|
||||||
|
if not ok:
|
||||||
|
return []
|
||||||
|
|
||||||
|
return get_ipv4_addrs().get(iface, [])
|
||||||
|
|
||||||
|
|
||||||
|
def acquire_modem_interface(modem_subnet, explicit_iface=None):
|
||||||
|
"""通过 DHCP + IP 网段识别真正的模组接口。"""
|
||||||
|
iface, matched_addrs = find_interface_by_subnet(
|
||||||
|
modem_subnet,
|
||||||
|
explicit_iface=explicit_iface,
|
||||||
|
)
|
||||||
|
if iface:
|
||||||
|
return iface, matched_addrs
|
||||||
|
|
||||||
|
candidates = list_usb_network_candidates(explicit_iface=explicit_iface)
|
||||||
|
if not candidates:
|
||||||
|
raise RuntimeError("未找到可尝试 DHCP 的 USB 网卡候选项")
|
||||||
|
|
||||||
|
print(f"[INFO] 当前 USB 网卡候选项: {', '.join(candidates)}")
|
||||||
|
|
||||||
|
for iface in candidates:
|
||||||
|
print(f"[INFO] 尝试为 {iface} 获取 IPv4")
|
||||||
|
ensure_ipv4(iface)
|
||||||
|
|
||||||
|
matched_iface, matched_addrs = find_interface_by_subnet(
|
||||||
|
modem_subnet,
|
||||||
|
explicit_iface=explicit_iface,
|
||||||
|
)
|
||||||
|
if matched_iface:
|
||||||
|
return matched_iface, matched_addrs
|
||||||
|
|
||||||
|
return None, []
|
||||||
|
|
||||||
|
|
||||||
|
def print_interface_status(iface):
|
||||||
|
# 3. 拨号成功后,打印 ip/ifconfig,确认模组网口和地址。
|
||||||
|
print(f"[OK] 检测到 5G 网卡: {iface}")
|
||||||
|
|
||||||
|
code, output = run_cmd(["ip", "-4", "addr", "show", "dev", iface], timeout=10)
|
||||||
|
if code == 0 and output:
|
||||||
|
print(output)
|
||||||
|
|
||||||
|
if shutil.which("ifconfig"):
|
||||||
|
code, ifconfig_output = run_cmd(["ifconfig", iface], timeout=10)
|
||||||
|
if code == 0 and ifconfig_output:
|
||||||
|
print("\n===== ifconfig =====")
|
||||||
|
print(ifconfig_output)
|
||||||
|
|
||||||
|
|
||||||
|
def save_interface_info(iface, output_file=DEFAULT_INFO_JSON):
|
||||||
|
"""把网口名称、IPv4、IPv6 保存到 JSON 文件。"""
|
||||||
|
data = {
|
||||||
|
"interface": iface,
|
||||||
|
"ipv4": get_ipv4_addrs().get(iface, []),
|
||||||
|
"ipv6": get_ipv6_addrs().get(iface, []),
|
||||||
|
}
|
||||||
|
|
||||||
|
with open(output_file, "w", encoding="utf-8") as json_file:
|
||||||
|
json.dump(data, json_file, ensure_ascii=False, indent=2)
|
||||||
|
|
||||||
|
print(f"[OK] 网口信息已保存到 {output_file}")
|
||||||
|
|
||||||
|
|
||||||
|
def ping_target(iface, target, count=3, timeout=15):
|
||||||
|
"""通过指定网口 ping 一个目标。"""
|
||||||
|
code, output = run_cmd(
|
||||||
|
["ping", "-I", iface, "-c", str(count), "-W", "3", target],
|
||||||
|
timeout=timeout,
|
||||||
|
)
|
||||||
|
return code == 0, output
|
||||||
|
|
||||||
|
|
||||||
|
def print_ping_summary(output):
|
||||||
|
"""只打印 ping 的关键结果。"""
|
||||||
|
for line in output.splitlines():
|
||||||
|
if "packets transmitted" in line or "rtt " in line or "Destination " in line:
|
||||||
|
print(line)
|
||||||
|
|
||||||
|
|
||||||
|
def verify_connectivity(iface, gateway=DEFAULT_MODEM_GATEWAY, targets=DEFAULT_PUBLIC_TARGETS, retry_interval=3, max_wait=45):
|
||||||
|
# 4. 最后先 ping 模组网关,再重试公网连通性。
|
||||||
|
"""先测模组网关,再轮询公网目标地址。"""
|
||||||
|
ok, output = ping_target(iface, gateway, count=3, timeout=15)
|
||||||
|
if ok:
|
||||||
|
print(f"[OK] {iface} 可到达模组网关 {gateway}")
|
||||||
|
print_ping_summary(output)
|
||||||
|
else:
|
||||||
|
print(f"[WARN] {iface} 无法到达模组网关 {gateway}")
|
||||||
|
if output:
|
||||||
|
print(output)
|
||||||
|
return False
|
||||||
|
|
||||||
|
deadline = time.time() + max_wait
|
||||||
|
attempt = 1
|
||||||
|
while True:
|
||||||
|
for target in targets:
|
||||||
|
ok, output = ping_target(iface, target, count=3, timeout=15)
|
||||||
|
if ok:
|
||||||
|
print(f"[OK] {iface} 可通过 {target}")
|
||||||
|
print_ping_summary(output)
|
||||||
|
return True
|
||||||
|
|
||||||
|
print(f"[WARN] 第 {attempt} 次 Ping {target} 失败")
|
||||||
|
if output:
|
||||||
|
print_ping_summary(output)
|
||||||
|
|
||||||
|
if time.time() >= deadline:
|
||||||
|
print(f"[WARN] {iface} 在 {max_wait} 秒内仍无法连通 {', '.join(targets)}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
attempt += 1
|
||||||
|
time.sleep(retry_interval)
|
||||||
|
|
||||||
|
|
||||||
|
def ping_via_interface(iface, targets=DEFAULT_PUBLIC_TARGETS):
|
||||||
|
"""保留原调用点,内部走完整连通性检查。"""
|
||||||
|
return verify_connectivity(iface, targets=targets)
|
||||||
|
|
||||||
|
|
||||||
|
def parse_args():
|
||||||
|
parser = argparse.ArgumentParser(description="RM520N-GL RNDIS 自动拨号脚本")
|
||||||
|
parser.add_argument(
|
||||||
|
"--serial-port",
|
||||||
|
default=DEFAULT_SERIAL_PORT,
|
||||||
|
help=f"AT 串口路径,默认 {DEFAULT_SERIAL_PORT}",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--interface",
|
||||||
|
help="指定期望的 5G 网卡名,例如 eth0",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--modem-subnet",
|
||||||
|
default=DEFAULT_MODEM_SUBNET,
|
||||||
|
help=f"拨号成功后用于识别模组接口的 IPv4 网段,默认 {DEFAULT_MODEM_SUBNET}",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--gateway",
|
||||||
|
type=parse_ipv4_address,
|
||||||
|
default=DEFAULT_MODEM_GATEWAY,
|
||||||
|
help=f"5G 模组网关地址,默认 {DEFAULT_MODEM_GATEWAY}",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--skip-dhcp",
|
||||||
|
action="store_true",
|
||||||
|
help="只等待 USB 网卡出现,不主动申请 IPv4",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--remove-default-route",
|
||||||
|
action="store_true",
|
||||||
|
help="拨号成功后删除 5G 接口上的默认路由,只保留显式主机路由",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--route-target",
|
||||||
|
action="append",
|
||||||
|
default=[],
|
||||||
|
type=parse_ipv4_address,
|
||||||
|
help="拨号完成后通过 5G 接口保留的 IPv4 主机路由目标,可重复传入",
|
||||||
|
)
|
||||||
|
return parser.parse_args()
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
args = parse_args()
|
||||||
|
require_root()
|
||||||
|
require_commands()
|
||||||
|
|
||||||
|
print("===== RM520N-GL RNDIS 自动拨号 =====")
|
||||||
|
print(f"[INFO] 目标模组网段: {args.modem_subnet}")
|
||||||
|
|
||||||
|
#1.检测 lsusb,确认是否识别到模块
|
||||||
|
present, detail = usb_device_present()
|
||||||
|
if not present:
|
||||||
|
print(f"[FAIL] 未检测到模块 USB 设备 {USB_ID}")
|
||||||
|
if detail:
|
||||||
|
print(detail)
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
print(f"[OK] 检测到 USB 设备: {detail}")
|
||||||
|
print(f"[INFO] 使用 AT 口: {args.serial_port}")
|
||||||
|
|
||||||
|
#2.进行 Python 串口拨号
|
||||||
|
try:
|
||||||
|
configure_rndis(args.serial_port)
|
||||||
|
|
||||||
|
print("[INFO] 已发送 AT+CFUN=1,1,等待模块重启")
|
||||||
|
disappeared, _ = wait_for_usb_device(expected_present=False, timeout=25)
|
||||||
|
if disappeared:
|
||||||
|
print("[OK] 模块已下线,继续等待重新枚举")
|
||||||
|
else:
|
||||||
|
print("[WARN] 未观察到模块下线,继续等待重新枚举")
|
||||||
|
|
||||||
|
reappeared, detail = wait_for_usb_device(expected_present=True, timeout=90)
|
||||||
|
if not reappeared:
|
||||||
|
print(f"[FAIL] 模块重启后未重新枚举: {USB_ID}")
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
print(f"[OK] 模块已重新枚举: {detail}")
|
||||||
|
|
||||||
|
candidates = wait_for_usb_candidates(explicit_iface=args.interface, timeout=90)
|
||||||
|
if not candidates:
|
||||||
|
print("[FAIL] 未检测到 5G 模组枚举出的 USB 网卡")
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
if args.skip_dhcp:
|
||||||
|
print(f"[INFO] 当前 USB 网卡候选项: {', '.join(candidates)}")
|
||||||
|
iface, ipv4_addrs = find_interface_by_subnet(
|
||||||
|
args.modem_subnet,
|
||||||
|
explicit_iface=args.interface,
|
||||||
|
)
|
||||||
|
if not iface:
|
||||||
|
print(f"[WARN] 当前还没有接口拿到目标网段 {args.modem_subnet} 的地址")
|
||||||
|
sys.exit(1)
|
||||||
|
else:
|
||||||
|
iface, ipv4_addrs = acquire_modem_interface(
|
||||||
|
args.modem_subnet,
|
||||||
|
explicit_iface=args.interface,
|
||||||
|
)
|
||||||
|
if not iface:
|
||||||
|
print(f"[FAIL] 未找到落在目标网段 {args.modem_subnet} 内的模组接口")
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
print_interface_status(iface)
|
||||||
|
|
||||||
|
if ipv4_addrs:
|
||||||
|
for addr in ipv4_addrs:
|
||||||
|
print(f"[OK] {iface} 已获取 IPv4: {addr}")
|
||||||
|
save_interface_info(iface)
|
||||||
|
route_targets = dedupe_keep_order(args.route_target)
|
||||||
|
if args.remove_default_route:
|
||||||
|
enforce_route_policy(iface, args.gateway, route_targets)
|
||||||
|
|
||||||
|
connectivity_targets = route_targets or list(DEFAULT_PUBLIC_TARGETS)
|
||||||
|
ping_via_interface(iface, targets=connectivity_targets)
|
||||||
|
print(f"[DONE] RNDIS 拨号完成,可执行: sudo python3 speed_test.py {iface}")
|
||||||
|
return
|
||||||
|
|
||||||
|
print(f"[WARN] {iface} 已出现,但还没有 IPv4 地址")
|
||||||
|
print(f"[INFO] 可手动检查: ip addr show {iface}")
|
||||||
|
sys.exit(1)
|
||||||
|
except (RuntimeError, subprocess.TimeoutExpired) as exc:
|
||||||
|
print(f"[FAIL] {exc}")
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
60
scripts/boot/robot-boot.env
Normal file
60
scripts/boot/robot-boot.env
Normal file
@@ -0,0 +1,60 @@
|
|||||||
|
# Boot-time settings for the robot-side autostart chain.
|
||||||
|
# Override machine-specific values in robot-boot.env.local.
|
||||||
|
|
||||||
|
BLITZ_BOOT_DELAY_SEC="30"
|
||||||
|
BLITZ_RUN_ROOT="/var/log/blitz-robot"
|
||||||
|
BLITZ_LOG_FILE="/var/log/blitz-robot/startup.log"
|
||||||
|
BLITZ_RUNTIME_DIR="/run/blitz-robot"
|
||||||
|
BLITZ_RUN_CONTEXT_FILE="${BLITZ_RUNTIME_DIR}/run-context.env"
|
||||||
|
BLITZ_RUN_ID_FILE="${BLITZ_RUNTIME_DIR}/run-id"
|
||||||
|
BLITZ_CURRENT_RUN_LINK="${BLITZ_RUN_ROOT}/current"
|
||||||
|
|
||||||
|
BLITZ_5G_DIAL_DIR="${OMNISOCKETGO_ROOT}/scripts/boot"
|
||||||
|
BLITZ_5G_SERIAL_PORT="/dev/ttyUSB2"
|
||||||
|
BLITZ_5G_INTERFACE=""
|
||||||
|
BLITZ_5G_MODEM_SUBNET="192.168.224.0/22"
|
||||||
|
BLITZ_5G_GATEWAY="192.168.225.1"
|
||||||
|
BLITZ_5G_SKIP_DHCP="0"
|
||||||
|
BLITZ_5G_REMOVE_DEFAULT_ROUTE="1"
|
||||||
|
BLITZ_5G_ROUTE_TARGETS="106.55.173.235"
|
||||||
|
BLITZ_5G_INFO_JSON="${OMNISOCKETGO_ROOT}/scripts/boot/modem_network_info.json"
|
||||||
|
BLITZ_5G_SERIAL_WAIT_SEC="60"
|
||||||
|
BLITZ_5G_ROUTE_WAIT_SEC="30"
|
||||||
|
|
||||||
|
# Leave empty to fall back to the host part of ROBOT_SIDE_OMNISOCKET_SERVER_ADDR.
|
||||||
|
BLITZ_TIME_SERVER_IP="81.70.156.140"
|
||||||
|
|
||||||
|
BLITZ_ROS_USER="nvidia"
|
||||||
|
BLITZ_ROS_SOCKET_WAIT_SEC="20"
|
||||||
|
BLITZ_WATCHDOG_INTERVAL_SEC="5"
|
||||||
|
BLITZ_HEALTH_STALE_SEC="15"
|
||||||
|
BLITZ_OMNID_THREAD_HEARTBEAT_TIMEOUT_SEC="15"
|
||||||
|
BLITZ_KCP_STATS_INTERVAL_MS="1000"
|
||||||
|
BLITZ_CONTROL_LATENCY_LOG_ENABLED="1"
|
||||||
|
BLITZ_CONTROL_LATENCY_LOG_SAMPLE_MOD="100"
|
||||||
|
BLITZ_CONTROL_ACK_SAMPLE_MOD="10"
|
||||||
|
BLITZ_VIDEO_STAGE_LOG_ENABLED="1"
|
||||||
|
BLITZ_VIDEO_STAGE_LOG_SAMPLE_MOD="10"
|
||||||
|
BLITZ_5G_LINK_LOG_INTERVAL_SEC="5"
|
||||||
|
BLITZ_JSONL_FLUSH_INTERVAL_MS="1000"
|
||||||
|
BLITZ_JSONL_FLUSH_BYTES="262144"
|
||||||
|
BLITZ_JSONL_ROTATE_BYTES="134217728"
|
||||||
|
BLITZ_JSONL_ROTATE_FILES="8"
|
||||||
|
# Log one normal relay packet out of every N packets. Drop events still log immediately.
|
||||||
|
OMNI_RELAY_PACKET_LOG_SAMPLE_EVERY="200"
|
||||||
|
BLITZ_INCIDENT_COMMAND_TIMEOUT_SEC="5"
|
||||||
|
BLITZ_INCIDENT_TOTAL_TIMEOUT_SEC="30"
|
||||||
|
BLITZ_NETWORK_FAIL_THRESHOLD="3"
|
||||||
|
BLITZ_NETWORK_RECOVERY_COOLDOWN_SEC="30"
|
||||||
|
BLITZ_GPS_MONITOR_ENABLED="1"
|
||||||
|
BLITZ_GPS_DEVICE_GLOB="/dev/ttyCH341USB*"
|
||||||
|
BLITZ_GPS_CHECK_INTERVAL_SEC="10"
|
||||||
|
BLITZ_GPS_RESTART_UNITS="gpsd.socket gpsd.service"
|
||||||
|
BLITZ_WATCHDOG_ALLOW_FAULT_INJECTION="0"
|
||||||
|
|
||||||
|
OMNI_CAMERA_DEVICE="/dev/v4l/by-path/platform-a80aa10000.usb-usb-0:3.2:1.4-video-index0"
|
||||||
|
|
||||||
|
# Boot units run b_side_omnid as root directly, so nested sudo must stay off.
|
||||||
|
B_SIDE_OMNID_USE_SUDO="0"
|
||||||
|
OMNI_CONTROL_ACK_PEER_ID="peer-b-ctrl-ack"
|
||||||
|
OMNI_CONTROL_ACK_TARGET_PEER="peer-a-ctrl-ack"
|
||||||
18
scripts/boot/start-5g-link-logger-service.sh
Normal file
18
scripts/boot/start-5g-link-logger-service.sh
Normal file
@@ -0,0 +1,18 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||||
|
# shellcheck disable=SC1091
|
||||||
|
source "${SCRIPT_DIR}/common.sh"
|
||||||
|
|
||||||
|
STEP="5g-link-logger-service"
|
||||||
|
|
||||||
|
blitz_load_boot_env
|
||||||
|
blitz_require_run_context
|
||||||
|
|
||||||
|
export OMNI_BOOT_MODE="1"
|
||||||
|
export BLITZ_INSTANCE_ID="${BLITZ_INSTANCE_ID:-$(blitz_new_instance_id)}"
|
||||||
|
export BLITZ_5G_LINK_LOG_PATH="${BLITZ_5G_LINK_LOG_PATH:-${BLITZ_RUN_DIR}/b-5g-link-quality.${BLITZ_INSTANCE_ID}.jsonl}"
|
||||||
|
|
||||||
|
blitz_log "${STEP}" "start" "start" "exec bash ${OMNISOCKETGO_ROOT}/scripts/boot/blitz-5g-link-logger.sh" 0
|
||||||
|
exec bash "${OMNISOCKETGO_ROOT}/scripts/boot/blitz-5g-link-logger.sh"
|
||||||
19
scripts/boot/start-b-side-omnid-service.sh
Normal file
19
scripts/boot/start-b-side-omnid-service.sh
Normal file
@@ -0,0 +1,19 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||||
|
# shellcheck disable=SC1091
|
||||||
|
source "${SCRIPT_DIR}/common.sh"
|
||||||
|
|
||||||
|
STEP="b-side-omnid"
|
||||||
|
|
||||||
|
blitz_load_boot_env
|
||||||
|
blitz_require_run_context
|
||||||
|
|
||||||
|
blitz_require_executable "${OMNISOCKETGO_ROOT}/bin/b_side_omnid" "${STEP}"
|
||||||
|
|
||||||
|
export OMNI_BOOT_MODE="1"
|
||||||
|
export B_SIDE_OMNID_USE_SUDO="0"
|
||||||
|
|
||||||
|
blitz_log "${STEP}" "start" "start" "exec bash ${OMNISOCKETGO_ROOT}/scripts/dev/start-b-side-omnid.sh" 0
|
||||||
|
exec bash "${OMNISOCKETGO_ROOT}/scripts/dev/start-b-side-omnid.sh"
|
||||||
18
scripts/boot/start-ros-receiver-service.sh
Normal file
18
scripts/boot/start-ros-receiver-service.sh
Normal file
@@ -0,0 +1,18 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||||
|
# shellcheck disable=SC1091
|
||||||
|
source "${SCRIPT_DIR}/common.sh"
|
||||||
|
|
||||||
|
STEP="ros-receiver"
|
||||||
|
|
||||||
|
blitz_load_boot_env
|
||||||
|
blitz_require_run_context
|
||||||
|
|
||||||
|
blitz_require_file "/opt/ros/${ROS_DISTRO}/setup.bash" "${STEP}"
|
||||||
|
blitz_require_file "${ROS_CONTROL_PY_DIR}/install/setup.bash" "${STEP}"
|
||||||
|
|
||||||
|
export OMNI_BOOT_MODE="1"
|
||||||
|
blitz_log "${STEP}" "start" "start" "exec bash ${OMNISOCKETGO_ROOT}/scripts/dev/start-ros-receiver.sh" 0
|
||||||
|
exec bash "${OMNISOCKETGO_ROOT}/scripts/dev/start-ros-receiver.sh"
|
||||||
15
scripts/boot/systemd/blitz-5g-dial.service.in
Normal file
15
scripts/boot/systemd/blitz-5g-dial.service.in
Normal file
@@ -0,0 +1,15 @@
|
|||||||
|
[Unit]
|
||||||
|
Description=Blitz robot 5G dial
|
||||||
|
PartOf=blitz-robot.target
|
||||||
|
After=blitz-run-context.service
|
||||||
|
Requires=blitz-run-context.service
|
||||||
|
|
||||||
|
[Service]
|
||||||
|
Type=oneshot
|
||||||
|
RemainAfterExit=yes
|
||||||
|
ExecStart=/bin/bash @OMNISOCKETGO_ROOT@/scripts/boot/5g-dial.sh
|
||||||
|
StandardOutput=append:@BLITZ_LOG_FILE@
|
||||||
|
StandardError=append:@BLITZ_LOG_FILE@
|
||||||
|
|
||||||
|
[Install]
|
||||||
|
WantedBy=blitz-robot.target
|
||||||
19
scripts/boot/systemd/blitz-5g-link-logger.service.in
Normal file
19
scripts/boot/systemd/blitz-5g-link-logger.service.in
Normal file
@@ -0,0 +1,19 @@
|
|||||||
|
[Unit]
|
||||||
|
Description=Blitz robot 5G link logger
|
||||||
|
PartOf=blitz-robot.target
|
||||||
|
After=blitz-run-context.service blitz-5g-dial.service
|
||||||
|
Requires=blitz-run-context.service
|
||||||
|
Wants=blitz-run-context.service blitz-5g-dial.service
|
||||||
|
|
||||||
|
[Service]
|
||||||
|
Type=simple
|
||||||
|
EnvironmentFile=-/run/blitz-robot/run-context.env
|
||||||
|
ExecStartPre=/bin/bash @OMNISOCKETGO_ROOT@/scripts/boot/prepare-runtime-dir.sh
|
||||||
|
ExecStart=/bin/bash @OMNISOCKETGO_ROOT@/scripts/boot/start-5g-link-logger-service.sh
|
||||||
|
Restart=always
|
||||||
|
RestartSec=5
|
||||||
|
StandardOutput=append:@BLITZ_LOG_FILE@
|
||||||
|
StandardError=append:@BLITZ_LOG_FILE@
|
||||||
|
|
||||||
|
[Install]
|
||||||
|
WantedBy=blitz-robot.target
|
||||||
20
scripts/boot/systemd/blitz-b-side-omnid.service.in
Normal file
20
scripts/boot/systemd/blitz-b-side-omnid.service.in
Normal file
@@ -0,0 +1,20 @@
|
|||||||
|
[Unit]
|
||||||
|
Description=Blitz robot b-side omnid
|
||||||
|
PartOf=blitz-robot.target
|
||||||
|
After=blitz-run-context.service blitz-5g-dial.service blitz-ros-receiver.service
|
||||||
|
Requires=blitz-run-context.service
|
||||||
|
Wants=blitz-run-context.service blitz-5g-dial.service blitz-ros-receiver.service
|
||||||
|
|
||||||
|
[Service]
|
||||||
|
Type=simple
|
||||||
|
EnvironmentFile=-/run/blitz-robot/run-context.env
|
||||||
|
ExecStartPre=/bin/bash @OMNISOCKETGO_ROOT@/scripts/boot/prepare-runtime-dir.sh
|
||||||
|
ExecStart=/bin/bash @OMNISOCKETGO_ROOT@/scripts/boot/start-b-side-omnid-service.sh
|
||||||
|
ExecStopPost=/bin/bash -lc 'if [[ "${SERVICE_RESULT:-success}" != "success" ]]; then exec /bin/bash "@OMNISOCKETGO_ROOT@/scripts/boot/blitz-incident-capture-launch.sh" --source exec-stop-post --unit "%n" --result "${SERVICE_RESULT:-}" --exit-status "${EXIT_STATUS:-}" --reason b-side-service-exit; fi'
|
||||||
|
Restart=always
|
||||||
|
RestartSec=2
|
||||||
|
StandardOutput=append:@BLITZ_LOG_FILE@
|
||||||
|
StandardError=append:@BLITZ_LOG_FILE@
|
||||||
|
|
||||||
|
[Install]
|
||||||
|
WantedBy=blitz-robot.target
|
||||||
15
scripts/boot/systemd/blitz-boot-gate.service.in
Normal file
15
scripts/boot/systemd/blitz-boot-gate.service.in
Normal file
@@ -0,0 +1,15 @@
|
|||||||
|
[Unit]
|
||||||
|
Description=Blitz robot boot gate
|
||||||
|
PartOf=blitz-robot.target
|
||||||
|
After=multi-user.target network-online.target
|
||||||
|
Wants=network-online.target
|
||||||
|
|
||||||
|
[Service]
|
||||||
|
Type=oneshot
|
||||||
|
RemainAfterExit=yes
|
||||||
|
ExecStart=/bin/bash @OMNISOCKETGO_ROOT@/scripts/boot/boot-gate.sh
|
||||||
|
StandardOutput=append:@BLITZ_LOG_FILE@
|
||||||
|
StandardError=append:@BLITZ_LOG_FILE@
|
||||||
|
|
||||||
|
[Install]
|
||||||
|
WantedBy=blitz-robot.target
|
||||||
13
scripts/boot/systemd/blitz-robot.target.in
Normal file
13
scripts/boot/systemd/blitz-robot.target.in
Normal file
@@ -0,0 +1,13 @@
|
|||||||
|
[Unit]
|
||||||
|
Description=Blitz robot boot chain
|
||||||
|
Wants=blitz-boot-gate.service
|
||||||
|
Wants=blitz-run-context.service
|
||||||
|
Wants=blitz-5g-dial.service
|
||||||
|
Wants=blitz-5g-link-logger.service
|
||||||
|
Wants=blitz-ros-receiver.service
|
||||||
|
Wants=blitz-b-side-omnid.service
|
||||||
|
Wants=blitz-watchdog.service
|
||||||
|
After=multi-user.target
|
||||||
|
|
||||||
|
[Install]
|
||||||
|
WantedBy=multi-user.target
|
||||||
23
scripts/boot/systemd/blitz-ros-receiver.service.in
Normal file
23
scripts/boot/systemd/blitz-ros-receiver.service.in
Normal file
@@ -0,0 +1,23 @@
|
|||||||
|
[Unit]
|
||||||
|
Description=Blitz robot ROS receiver
|
||||||
|
PartOf=blitz-robot.target
|
||||||
|
After=blitz-run-context.service blitz-5g-dial.service
|
||||||
|
Requires=blitz-run-context.service
|
||||||
|
Wants=blitz-run-context.service blitz-5g-dial.service
|
||||||
|
|
||||||
|
[Service]
|
||||||
|
Type=simple
|
||||||
|
User=@BLITZ_ROS_USER@
|
||||||
|
PermissionsStartOnly=true
|
||||||
|
EnvironmentFile=-/run/blitz-robot/run-context.env
|
||||||
|
ExecStartPre=/bin/bash @OMNISOCKETGO_ROOT@/scripts/boot/prepare-runtime-dir.sh
|
||||||
|
ExecStart=/bin/bash @OMNISOCKETGO_ROOT@/scripts/boot/start-ros-receiver-service.sh
|
||||||
|
ExecStartPost=/bin/bash @OMNISOCKETGO_ROOT@/scripts/boot/wait-for-unix-socket.sh --step ros-receiver
|
||||||
|
ExecStopPost=/bin/bash -lc 'if [[ "${SERVICE_RESULT:-success}" != "success" ]]; then exec /bin/bash "@OMNISOCKETGO_ROOT@/scripts/boot/blitz-incident-capture-launch.sh" --source exec-stop-post --unit "%n" --result "${SERVICE_RESULT:-}" --exit-status "${EXIT_STATUS:-}" --reason ros-service-exit; fi'
|
||||||
|
Restart=always
|
||||||
|
RestartSec=2
|
||||||
|
StandardOutput=append:@BLITZ_LOG_FILE@
|
||||||
|
StandardError=append:@BLITZ_LOG_FILE@
|
||||||
|
|
||||||
|
[Install]
|
||||||
|
WantedBy=blitz-robot.target
|
||||||
15
scripts/boot/systemd/blitz-run-context.service.in
Normal file
15
scripts/boot/systemd/blitz-run-context.service.in
Normal file
@@ -0,0 +1,15 @@
|
|||||||
|
[Unit]
|
||||||
|
Description=Blitz robot run context
|
||||||
|
PartOf=blitz-robot.target
|
||||||
|
After=blitz-boot-gate.service
|
||||||
|
Requires=blitz-boot-gate.service
|
||||||
|
|
||||||
|
[Service]
|
||||||
|
Type=oneshot
|
||||||
|
RemainAfterExit=yes
|
||||||
|
ExecStart=/bin/bash @OMNISOCKETGO_ROOT@/scripts/boot/blitz-run-context.sh
|
||||||
|
StandardOutput=append:@BLITZ_LOG_FILE@
|
||||||
|
StandardError=append:@BLITZ_LOG_FILE@
|
||||||
|
|
||||||
|
[Install]
|
||||||
|
WantedBy=blitz-robot.target
|
||||||
19
scripts/boot/systemd/blitz-watchdog.service.in
Normal file
19
scripts/boot/systemd/blitz-watchdog.service.in
Normal file
@@ -0,0 +1,19 @@
|
|||||||
|
[Unit]
|
||||||
|
Description=Blitz robot health watchdog
|
||||||
|
PartOf=blitz-robot.target
|
||||||
|
After=blitz-run-context.service blitz-b-side-omnid.service blitz-ros-receiver.service
|
||||||
|
Requires=blitz-run-context.service
|
||||||
|
Wants=blitz-run-context.service blitz-b-side-omnid.service blitz-ros-receiver.service
|
||||||
|
|
||||||
|
[Service]
|
||||||
|
Type=simple
|
||||||
|
EnvironmentFile=-/run/blitz-robot/run-context.env
|
||||||
|
ExecStartPre=/bin/bash @OMNISOCKETGO_ROOT@/scripts/boot/prepare-runtime-dir.sh
|
||||||
|
ExecStart=/bin/bash @OMNISOCKETGO_ROOT@/scripts/boot/blitz-watchdog.sh
|
||||||
|
Restart=always
|
||||||
|
RestartSec=5
|
||||||
|
StandardOutput=append:@BLITZ_LOG_FILE@
|
||||||
|
StandardError=append:@BLITZ_LOG_FILE@
|
||||||
|
|
||||||
|
[Install]
|
||||||
|
WantedBy=blitz-robot.target
|
||||||
49
scripts/boot/wait-for-unix-socket.sh
Normal file
49
scripts/boot/wait-for-unix-socket.sh
Normal file
@@ -0,0 +1,49 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||||
|
# shellcheck disable=SC1091
|
||||||
|
source "${SCRIPT_DIR}/common.sh"
|
||||||
|
|
||||||
|
STEP="ros-receiver"
|
||||||
|
SOCKET_PATH=""
|
||||||
|
TIMEOUT_SEC=""
|
||||||
|
|
||||||
|
while [[ $# -gt 0 ]]; do
|
||||||
|
case "$1" in
|
||||||
|
--path)
|
||||||
|
SOCKET_PATH="$2"
|
||||||
|
shift 2
|
||||||
|
;;
|
||||||
|
--timeout)
|
||||||
|
TIMEOUT_SEC="$2"
|
||||||
|
shift 2
|
||||||
|
;;
|
||||||
|
--step)
|
||||||
|
STEP="$2"
|
||||||
|
shift 2
|
||||||
|
;;
|
||||||
|
*)
|
||||||
|
blitz_log "${STEP}" "wait-socket-arg" "failure" "unknown argument: $1" 2
|
||||||
|
exit 2
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
done
|
||||||
|
|
||||||
|
blitz_load_boot_env
|
||||||
|
|
||||||
|
SOCKET_PATH="${SOCKET_PATH:-${ROBOT_RECEIVER_LOCAL_SOCKET_PATH}}"
|
||||||
|
TIMEOUT_SEC="${TIMEOUT_SEC:-${BLITZ_ROS_SOCKET_WAIT_SEC}}"
|
||||||
|
|
||||||
|
blitz_log "${STEP}" "wait-socket" "start" "path=${SOCKET_PATH} timeout_sec=${TIMEOUT_SEC}" 0
|
||||||
|
|
||||||
|
for (( waited=0; waited< TIMEOUT_SEC; waited++ )); do
|
||||||
|
if [[ -S "${SOCKET_PATH}" ]]; then
|
||||||
|
blitz_log "${STEP}" "wait-socket" "success" "path=${SOCKET_PATH} waited_sec=${waited}" 0
|
||||||
|
exit 0
|
||||||
|
fi
|
||||||
|
sleep 1
|
||||||
|
done
|
||||||
|
|
||||||
|
blitz_log "${STEP}" "wait-socket" "failure" "path=${SOCKET_PATH} timeout_sec=${TIMEOUT_SEC}" 1
|
||||||
|
exit 1
|
||||||
@@ -17,16 +17,19 @@ The scripts assume:
|
|||||||
- `robot-command-center` is a sibling directory next to it
|
- `robot-command-center` is a sibling directory next to it
|
||||||
|
|
||||||
If your `robot-command-center` is elsewhere, set `ROBOT_COMMAND_CENTER_ROOT` in `robot-remote.env.local`.
|
If your `robot-command-center` is elsewhere, set `ROBOT_COMMAND_CENTER_ROOT` in `robot-remote.env.local`.
|
||||||
|
`start-backend.sh` and `start-frontend.sh` need that repo; `start-ros-receiver.sh` and `start-b-side-omnid.sh` do not.
|
||||||
|
|
||||||
## Files
|
## Files
|
||||||
|
|
||||||
- `robot-remote.env`: shared defaults for backend, frontend, ROS, and `b_side_omnid`
|
- `robot-remote.env`: shared defaults for backend, frontend, ROS, and `b_side_omnid`
|
||||||
- `robot-remote.env.local`: optional local override file loaded after `robot-remote.env`
|
- `robot-remote.env.local`: optional local override file loaded after `robot-remote.env`
|
||||||
- `load-env.sh`: loads the shared environment into the current shell
|
- `load-env.sh`: loads the shared environment into the current shell
|
||||||
|
- `apply-camera-controls.sh`: applies the camera preset before `b_side_omnid` starts
|
||||||
- `start-backend.sh`: starts Django ASGI with `uvicorn`
|
- `start-backend.sh`: starts Django ASGI with `uvicorn`
|
||||||
|
- `log-network-summary.py`: polls the backend `network/latest` API and appends compact JSONL snapshots
|
||||||
- `start-frontend.sh`: starts the Vite dev server
|
- `start-frontend.sh`: starts the Vite dev server
|
||||||
- `start-ros-receiver.sh`: starts the ROS2 `udp_teleop_bridge` receiver
|
- `start-ros-receiver.sh`: starts the ROS2 `udp_teleop_bridge` receiver
|
||||||
- `start-b-side-omnid.sh`: starts `./bin/b_side_omnid` and uses `sudo -E` by default
|
- `start-b-side-omnid.sh`: applies camera controls, then starts `./bin/b_side_omnid` and uses `sudo -E` by default
|
||||||
- `start-dev-tmux.sh`: optional one-command `tmux` launcher for all four processes
|
- `start-dev-tmux.sh`: optional one-command `tmux` launcher for all four processes
|
||||||
|
|
||||||
## Usage
|
## Usage
|
||||||
@@ -52,6 +55,11 @@ If you only want the shared environment for manual commands:
|
|||||||
source scripts/dev/load-env.sh
|
source scripts/dev/load-env.sh
|
||||||
```
|
```
|
||||||
|
|
||||||
|
When you launch via `start-*.sh`, you do not need to manually `export` the variables from
|
||||||
|
`robot-remote.env` or `robot-remote.env.local`. `load-env.sh` loads those files with `set -a`,
|
||||||
|
so the variables are exported automatically for the child process. Manual `export` is only needed
|
||||||
|
if you bypass these scripts and start binaries directly from a clean shell.
|
||||||
|
|
||||||
## Customizing
|
## Customizing
|
||||||
|
|
||||||
Edit `scripts/dev/robot-remote.env` for shared changes such as:
|
Edit `scripts/dev/robot-remote.env` for shared changes such as:
|
||||||
@@ -63,19 +71,86 @@ Edit `scripts/dev/robot-remote.env` for shared changes such as:
|
|||||||
- `ROBOT_SIDE_OMNISOCKET_RELAY_VIA`
|
- `ROBOT_SIDE_OMNISOCKET_RELAY_VIA`
|
||||||
- `VITE_API_BASE_URL`
|
- `VITE_API_BASE_URL`
|
||||||
- `OMNI_CAMERA_DEVICE`
|
- `OMNI_CAMERA_DEVICE`
|
||||||
|
- `OMNI_CAMERA_PROFILE`
|
||||||
|
- `OMNI_CAMERA_BRIGHTNESS`
|
||||||
|
- `OMNI_CAMERA_CUSTOM_CTRL`
|
||||||
|
- `OMNI_CAMERA_VERIFY`
|
||||||
- `OMNI_VIDEO_PEER_ID`
|
- `OMNI_VIDEO_PEER_ID`
|
||||||
- `OMNI_CONTROL_PEER_ID`
|
- `OMNI_CONTROL_PEER_ID`
|
||||||
|
- `OMNI_VIDEO_SOFT_BACKPRESSURE_SEGMENTS`
|
||||||
|
- `OMNI_VIDEO_HARD_BACKPRESSURE_SEGMENTS`
|
||||||
|
- `OMNI_VIDEO_HARD_BACKPRESSURE_HOLD_MS`
|
||||||
|
- `OMNI_CONTROL_SERVER_IDLE_RECONNECT_MS`
|
||||||
|
- `OMNI_VIDEO_MAX_FRAME_AGE_MS`
|
||||||
|
- `OMNISOCKET_TELEMETRY_PEER_ID`
|
||||||
|
- `OMNISOCKET_TELEMETRY_INTERVAL_MS`
|
||||||
|
- `OMNISOCKET_TELEMETRY_STALE_AFTER_MS`
|
||||||
|
- `OMNI_NETWORK_SUMMARY_LOG_ENABLED`
|
||||||
|
- `OMNI_NETWORK_SUMMARY_LOG_PATH`
|
||||||
|
- `OMNI_NETWORK_SUMMARY_LOG_INTERVAL_MS`
|
||||||
|
|
||||||
|
Camera presets use `v4l2-ctl` from `v4l-utils` on the robot side.
|
||||||
|
|
||||||
Role mapping:
|
Role mapping:
|
||||||
|
|
||||||
- `start-backend.sh` uses the `CONTROL_SIDE_*` address pair
|
- `start-backend.sh` uses the `CONTROL_SIDE_*` address pair
|
||||||
- `start-b-side-omnid.sh` uses the `ROBOT_SIDE_*` address pair
|
- `start-b-side-omnid.sh` uses the `ROBOT_SIDE_*` address pair
|
||||||
|
- `start-b-side-omnid.sh` also applies the `OMNI_CAMERA_*` preset before the daemon opens the camera
|
||||||
- `start-ros-receiver.sh` defaults to the robot-side address pair, but with `transport=unix_dgram` it usually does not need the server address
|
- `start-ros-receiver.sh` defaults to the robot-side address pair, but with `transport=unix_dgram` it usually does not need the server address
|
||||||
|
|
||||||
|
New repair knobs:
|
||||||
|
|
||||||
|
- `OMNI_VIDEO_SOFT_BACKPRESSURE_SEGMENTS`, `OMNI_VIDEO_HARD_BACKPRESSURE_SEGMENTS`, and `OMNI_VIDEO_HARD_BACKPRESSURE_HOLD_MS` are used by `b_side_omnid`
|
||||||
|
- `OMNI_CONTROL_SERVER_IDLE_RECONNECT_MS` is used by `b_side_omnid`
|
||||||
|
- `OMNI_VIDEO_MAX_FRAME_AGE_MS` is used by `start-backend.sh` on the A-side backend, not by `b_side_omnid`
|
||||||
|
- `OMNISOCKET_TELEMETRY_INTERVAL_MS` and `OMNISOCKET_TELEMETRY_STALE_AFTER_MS` tune the backend's D-side telemetry freshness window
|
||||||
|
- `OMNI_NETWORK_SUMMARY_LOG_*` controls the A-side JSONL summary logger that polls `GET /api/network/latest/`
|
||||||
|
|
||||||
|
Default long-run network logging:
|
||||||
|
|
||||||
|
- A-side starts a compact JSONL logger by default at `${OMNISOCKETGO_ROOT}/logs/a-network-summary.jsonl`
|
||||||
|
- The default A-side polling interval is `2000 ms`
|
||||||
|
- For D-side long runs, prefer:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
./bin/kcpserver -listen 0.0.0.0:10909 \
|
||||||
|
-telemetry-peer peer-a-telemetry \
|
||||||
|
-telemetry-interval 1000ms \
|
||||||
|
-kcp-session-stats-log logs/d-kcp-stats.jsonl \
|
||||||
|
-kcp-session-stats-interval 1000ms
|
||||||
|
```
|
||||||
|
|
||||||
|
- Keep `-latency-log` and `-kcp-ts-debug-log` off by default for multi-hour runs
|
||||||
|
- Do not continuously redirect relay `C` stderr to a file unless you are reproducing a short issue window
|
||||||
|
|
||||||
Put machine-specific overrides into `scripts/dev/robot-remote.env.local`. Example:
|
Put machine-specific overrides into `scripts/dev/robot-remote.env.local`. Example:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
ROBOT_COMMAND_CENTER_ROOT="$HOME/Documents/robot-command-center"
|
ROBOT_COMMAND_CENTER_ROOT="$HOME/Documents/robot-command-center"
|
||||||
OMNI_CAMERA_DEVICE="/dev/video30"
|
OMNI_CAMERA_DEVICE="/dev/video30"
|
||||||
B_SIDE_OMNID_USE_SUDO="0"
|
B_SIDE_OMNID_USE_SUDO="0"
|
||||||
|
OMNI_NETWORK_SUMMARY_LOG_INTERVAL_MS="5000"
|
||||||
|
```
|
||||||
|
|
||||||
|
Default camera behavior is the `night` preset:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
OMNI_CAMERA_PROFILE="night"
|
||||||
|
# Optional per-machine tweak:
|
||||||
|
OMNI_CAMERA_BRIGHTNESS="8"
|
||||||
|
```
|
||||||
|
|
||||||
|
To switch to a daytime preset with brightness only:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
OMNI_CAMERA_PROFILE="day"
|
||||||
|
OMNI_CAMERA_BRIGHTNESS="8"
|
||||||
|
```
|
||||||
|
|
||||||
|
To send the raw `v4l2-ctl --set-ctrl=...` payload yourself:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
OMNI_CAMERA_PROFILE="custom"
|
||||||
|
OMNI_CAMERA_CUSTOM_CTRL="brightness=8,auto_exposure=1,exposure_time_absolute=800,gain=64"
|
||||||
|
OMNI_CAMERA_VERIFY="1"
|
||||||
```
|
```
|
||||||
|
|||||||
292
scripts/dev/aggregate-latency-estimates.py
Normal file
292
scripts/dev/aggregate-latency-estimates.py
Normal file
@@ -0,0 +1,292 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
from datetime import datetime, timezone
|
||||||
|
import html
|
||||||
|
import json
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
|
||||||
|
def parse_args() -> argparse.Namespace:
|
||||||
|
parser = argparse.ArgumentParser(description="Aggregate run logs into control/video latency estimate outputs.")
|
||||||
|
parser.add_argument("--run-dir", required=True, help="Run directory containing JSONL logs.")
|
||||||
|
parser.add_argument("--output-dir", help="Output directory. Defaults to --run-dir.")
|
||||||
|
return parser.parse_args()
|
||||||
|
|
||||||
|
|
||||||
|
def iter_jsonl(path: Path) -> list[dict[str, Any]]:
|
||||||
|
records: list[dict[str, Any]] = []
|
||||||
|
if not path.exists():
|
||||||
|
return records
|
||||||
|
with path.open("r", encoding="utf-8") as handle:
|
||||||
|
for raw_line in handle:
|
||||||
|
line = raw_line.strip()
|
||||||
|
if not line:
|
||||||
|
continue
|
||||||
|
try:
|
||||||
|
payload = json.loads(line)
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
continue
|
||||||
|
if isinstance(payload, dict):
|
||||||
|
records.append(payload)
|
||||||
|
return records
|
||||||
|
|
||||||
|
|
||||||
|
def load_glob_jsonl(run_dir: Path, pattern: str) -> list[dict[str, Any]]:
|
||||||
|
records: list[dict[str, Any]] = []
|
||||||
|
for path in sorted(run_dir.glob(pattern)):
|
||||||
|
records.extend(iter_jsonl(path))
|
||||||
|
return records
|
||||||
|
|
||||||
|
|
||||||
|
def write_jsonl(path: Path, records: list[dict[str, Any]]) -> None:
|
||||||
|
with path.open("w", encoding="utf-8") as handle:
|
||||||
|
for record in records:
|
||||||
|
handle.write(json.dumps(record, ensure_ascii=False, separators=(",", ":")))
|
||||||
|
handle.write("\n")
|
||||||
|
|
||||||
|
|
||||||
|
def parse_unix_ms(value: Any) -> int | None:
|
||||||
|
if value is None:
|
||||||
|
return None
|
||||||
|
if isinstance(value, (int, float)):
|
||||||
|
return int(value)
|
||||||
|
text = str(value).strip()
|
||||||
|
if not text:
|
||||||
|
return None
|
||||||
|
if text.endswith("Z"):
|
||||||
|
text = f"{text[:-1]}+00:00"
|
||||||
|
try:
|
||||||
|
return int(datetime.fromisoformat(text).astimezone(timezone.utc).timestamp() * 1000)
|
||||||
|
except ValueError:
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def flatten_net_epoch(samples: list[dict[str, Any]]) -> list[dict[str, Any]]:
|
||||||
|
flattened: list[dict[str, Any]] = []
|
||||||
|
for sample in samples:
|
||||||
|
links = sample.get("links") or {}
|
||||||
|
a_to_d = (links.get("a_to_d") or {}).get("sessions") or {}
|
||||||
|
d_to_b = (links.get("d_to_b") or {}).get("sessions") or {}
|
||||||
|
a_control = (a_to_d.get("control") or {}).get("kcp") or {}
|
||||||
|
d_control = (d_to_b.get("control") or {}).get("kcp") or {}
|
||||||
|
a_video = (a_to_d.get("video") or {}).get("kcp") or {}
|
||||||
|
d_video = (d_to_b.get("video") or {}).get("kcp") or {}
|
||||||
|
flattened.append(
|
||||||
|
{
|
||||||
|
"updated_at": sample.get("updated_at"),
|
||||||
|
"a_to_d_control_srtt_ms": a_control.get("srtt_ms"),
|
||||||
|
"a_to_d_control_min_srtt_ms": a_control.get("min_srtt_ms"),
|
||||||
|
"d_to_b_control_srtt_ms": d_control.get("srtt_ms"),
|
||||||
|
"d_to_b_control_min_srtt_ms": d_control.get("min_srtt_ms"),
|
||||||
|
"a_to_d_video_srtt_ms": a_video.get("srtt_ms"),
|
||||||
|
"a_to_d_video_min_srtt_ms": a_video.get("min_srtt_ms"),
|
||||||
|
"d_to_b_video_srtt_ms": d_video.get("srtt_ms"),
|
||||||
|
"d_to_b_video_min_srtt_ms": d_video.get("min_srtt_ms"),
|
||||||
|
"a_to_d_control_feedback_age_ms": a_control.get("last_feedback_age_ms"),
|
||||||
|
"d_to_b_control_feedback_age_ms": d_control.get("last_feedback_age_ms"),
|
||||||
|
"a_to_d_video_feedback_age_ms": a_video.get("last_feedback_age_ms"),
|
||||||
|
"d_to_b_video_feedback_age_ms": d_video.get("last_feedback_age_ms"),
|
||||||
|
"a_to_d_control_retrans_delta": ((a_to_d.get("control") or {}).get("trend") or {}).get("retrans_delta"),
|
||||||
|
"d_to_b_control_retrans_delta": ((d_to_b.get("control") or {}).get("trend") or {}).get("retrans_delta"),
|
||||||
|
"a_to_d_video_retrans_delta": ((a_to_d.get("video") or {}).get("trend") or {}).get("retrans_delta"),
|
||||||
|
"d_to_b_video_retrans_delta": ((d_to_b.get("video") or {}).get("trend") or {}).get("retrans_delta"),
|
||||||
|
"a_to_d_video_window_pressure_pct": a_video.get("window_pressure_pct"),
|
||||||
|
"d_to_b_video_window_pressure_pct": d_video.get("window_pressure_pct"),
|
||||||
|
"robot_health": sample.get("robot_health"),
|
||||||
|
}
|
||||||
|
)
|
||||||
|
return flattened
|
||||||
|
|
||||||
|
|
||||||
|
def aggregate_control_estimates(
|
||||||
|
network_samples: list[dict[str, Any]],
|
||||||
|
control_events: list[dict[str, Any]],
|
||||||
|
control_acks: list[dict[str, Any]],
|
||||||
|
) -> list[dict[str, Any]]:
|
||||||
|
if control_acks:
|
||||||
|
return control_acks
|
||||||
|
|
||||||
|
fallback: list[dict[str, Any]] = []
|
||||||
|
for sample in network_samples:
|
||||||
|
estimate = sample.get("latency_estimate") or {}
|
||||||
|
fallback.append(
|
||||||
|
{
|
||||||
|
"updated_at": sample.get("updated_at"),
|
||||||
|
"estimate_method": "srtt_fallback",
|
||||||
|
"control_loop_rtt_ms": estimate.get("control_loop_rtt_ms"),
|
||||||
|
"control_to_persist_est_ms": estimate.get("control_to_persist_est_ms"),
|
||||||
|
"control_oneway_srtt_est_ms": estimate.get("control_oneway_srtt_est_ms"),
|
||||||
|
"control_oneway_bestcase_est_ms": estimate.get("control_oneway_bestcase_est_ms"),
|
||||||
|
"source_event_count": len(control_events),
|
||||||
|
}
|
||||||
|
)
|
||||||
|
return fallback
|
||||||
|
|
||||||
|
|
||||||
|
def aggregate_video_estimates(
|
||||||
|
network_samples: list[dict[str, Any]],
|
||||||
|
frame_recv_records: list[dict[str, Any]],
|
||||||
|
display_probe_records: list[dict[str, Any]],
|
||||||
|
) -> list[dict[str, Any]]:
|
||||||
|
network_timeline = sorted(
|
||||||
|
(
|
||||||
|
(updated_at_ms, sample.get("latency_estimate") or {})
|
||||||
|
for sample in network_samples
|
||||||
|
for updated_at_ms in [parse_unix_ms(sample.get("updated_at"))]
|
||||||
|
if updated_at_ms is not None
|
||||||
|
),
|
||||||
|
key=lambda item: item[0],
|
||||||
|
)
|
||||||
|
probes_by_seq = {
|
||||||
|
int(record["frame_seq"]): record
|
||||||
|
for record in display_probe_records
|
||||||
|
if record.get("frame_seq") is not None
|
||||||
|
}
|
||||||
|
estimates: list[dict[str, Any]] = []
|
||||||
|
timeline_index = 0
|
||||||
|
|
||||||
|
for record in frame_recv_records:
|
||||||
|
frame_seq = record.get("frame_seq")
|
||||||
|
if frame_seq is None:
|
||||||
|
continue
|
||||||
|
probe = probes_by_seq.get(int(frame_seq))
|
||||||
|
backend_received_unix_ns = record.get("backend_received_unix_ns")
|
||||||
|
backend_received_unix_ms = None
|
||||||
|
try:
|
||||||
|
if backend_received_unix_ns is not None:
|
||||||
|
backend_received_unix_ms = int(int(backend_received_unix_ns) / 1_000_000)
|
||||||
|
except (TypeError, ValueError):
|
||||||
|
backend_received_unix_ms = None
|
||||||
|
|
||||||
|
latency_estimate: dict[str, Any] = {}
|
||||||
|
if backend_received_unix_ms is not None and network_timeline:
|
||||||
|
while timeline_index + 1 < len(network_timeline) and network_timeline[timeline_index + 1][0] <= backend_received_unix_ms:
|
||||||
|
timeline_index += 1
|
||||||
|
if network_timeline[timeline_index][0] <= backend_received_unix_ms:
|
||||||
|
latency_estimate = network_timeline[timeline_index][1]
|
||||||
|
|
||||||
|
network_oneway = latency_estimate.get("video_network_oneway_est_ms")
|
||||||
|
capture_to_send = record.get("b_side_capture_to_send_ms")
|
||||||
|
partial_est = None
|
||||||
|
if capture_to_send is not None or network_oneway is not None:
|
||||||
|
partial_est = round(float(capture_to_send or 0.0) + float(network_oneway or 0.0), 3)
|
||||||
|
request_to_paint_ms = None
|
||||||
|
if probe is not None and probe.get("request_to_paint_ms") is not None:
|
||||||
|
request_to_paint_ms = round(float(probe["request_to_paint_ms"]), 3)
|
||||||
|
elif probe is not None and probe.get("request_started_unix_ms") is not None and probe.get("paint_unix_ms") is not None:
|
||||||
|
request_to_paint_ms = round(float(probe["paint_unix_ms"]) - float(probe["request_started_unix_ms"]), 3)
|
||||||
|
video_e2e_est_ms = round(partial_est + request_to_paint_ms, 3) if partial_est is not None and request_to_paint_ms is not None else None
|
||||||
|
estimates.append(
|
||||||
|
{
|
||||||
|
"frame_seq": frame_seq,
|
||||||
|
"backend_received_unix_ns": record.get("backend_received_unix_ns"),
|
||||||
|
"frame_hash": record.get("frame_hash"),
|
||||||
|
"estimate_method": "capture_to_send+srtt/2+request_to_paint" if video_e2e_est_ms is not None else "capture_to_send+srtt/2",
|
||||||
|
"video_network_oneway_est_ms": network_oneway,
|
||||||
|
"b_side_capture_to_send_ms": capture_to_send,
|
||||||
|
"request_to_paint_ms": request_to_paint_ms,
|
||||||
|
"response_to_paint_ms": probe.get("response_to_paint_ms") if probe is not None else None,
|
||||||
|
"backend_to_request_ms": probe.get("backend_to_request_ms") if probe is not None else None,
|
||||||
|
"backend_to_request_ms_raw": probe.get("backend_to_request_ms_raw") if probe is not None else None,
|
||||||
|
"backend_to_paint_ms": probe.get("backend_to_paint_ms") if probe is not None else None,
|
||||||
|
"backend_to_paint_ms_raw": probe.get("backend_to_paint_ms_raw") if probe is not None else None,
|
||||||
|
"browser_backend_clock_offset_ms": probe.get("browser_backend_clock_offset_ms") if probe is not None else None,
|
||||||
|
"browser_backend_clock_rtt_ms": probe.get("browser_backend_clock_rtt_ms") if probe is not None else None,
|
||||||
|
"video_partial_est_ms": partial_est,
|
||||||
|
"video_e2e_est_ms": video_e2e_est_ms,
|
||||||
|
"sequence_gap": record.get("sequence_gap"),
|
||||||
|
"repeat_flag": record.get("repeat_flag"),
|
||||||
|
"sender_clock_delta_ms_raw": record.get("sender_clock_delta_ms_raw"),
|
||||||
|
}
|
||||||
|
)
|
||||||
|
return estimates
|
||||||
|
|
||||||
|
|
||||||
|
def write_html_summary(
|
||||||
|
path: Path,
|
||||||
|
*,
|
||||||
|
net_epochs: list[dict[str, Any]],
|
||||||
|
control_estimates: list[dict[str, Any]],
|
||||||
|
video_estimates: list[dict[str, Any]],
|
||||||
|
) -> None:
|
||||||
|
latest_control = control_estimates[-1] if control_estimates else {}
|
||||||
|
latest_video = video_estimates[-1] if video_estimates else {}
|
||||||
|
latest_net = net_epochs[-1] if net_epochs else {}
|
||||||
|
html_text = f"""<!doctype html>
|
||||||
|
<html lang="en">
|
||||||
|
<head>
|
||||||
|
<meta charset="utf-8">
|
||||||
|
<title>Latency Estimates</title>
|
||||||
|
<style>
|
||||||
|
body {{ font-family: Arial, sans-serif; margin: 24px; background: #0b1020; color: #eef2ff; }}
|
||||||
|
.grid {{ display: grid; grid-template-columns: repeat(3, minmax(0, 1fr)); gap: 16px; }}
|
||||||
|
.card {{ border: 1px solid #334155; border-radius: 8px; padding: 16px; background: #111827; }}
|
||||||
|
h1, h2 {{ margin-top: 0; }}
|
||||||
|
p {{ margin: 6px 0; line-height: 1.5; }}
|
||||||
|
code {{ color: #93c5fd; }}
|
||||||
|
</style>
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<h1>Latency Estimates</h1>
|
||||||
|
<div class="grid">
|
||||||
|
<section class="card">
|
||||||
|
<h2>Control</h2>
|
||||||
|
<p><strong>loop RTT:</strong> {html.escape(str(latest_control.get("control_loop_rtt_ms")))}</p>
|
||||||
|
<p><strong>to persist:</strong> {html.escape(str(latest_control.get("control_to_persist_est_ms")))}</p>
|
||||||
|
<p><strong>method:</strong> {html.escape(str(latest_control.get("estimate_method")))}</p>
|
||||||
|
<p><strong>samples:</strong> {len(control_estimates)}</p>
|
||||||
|
</section>
|
||||||
|
<section class="card">
|
||||||
|
<h2>Video</h2>
|
||||||
|
<p><strong>network one-way:</strong> {html.escape(str(latest_video.get("video_network_oneway_est_ms")))}</p>
|
||||||
|
<p><strong>partial:</strong> {html.escape(str(latest_video.get("video_partial_est_ms")))}</p>
|
||||||
|
<p><strong>end-to-end:</strong> {html.escape(str(latest_video.get("video_e2e_est_ms")))}</p>
|
||||||
|
<p><strong>samples:</strong> {len(video_estimates)}</p>
|
||||||
|
</section>
|
||||||
|
<section class="card">
|
||||||
|
<h2>Net Epoch</h2>
|
||||||
|
<p><strong>a→d control srtt:</strong> {html.escape(str(latest_net.get("a_to_d_control_srtt_ms")))}</p>
|
||||||
|
<p><strong>d→b control srtt:</strong> {html.escape(str(latest_net.get("d_to_b_control_srtt_ms")))}</p>
|
||||||
|
<p><strong>a→d video srtt:</strong> {html.escape(str(latest_net.get("a_to_d_video_srtt_ms")))}</p>
|
||||||
|
<p><strong>d→b video srtt:</strong> {html.escape(str(latest_net.get("d_to_b_video_srtt_ms")))}</p>
|
||||||
|
</section>
|
||||||
|
</div>
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
|
"""
|
||||||
|
path.write_text(html_text, encoding="utf-8")
|
||||||
|
|
||||||
|
|
||||||
|
def main() -> int:
|
||||||
|
args = parse_args()
|
||||||
|
run_dir = Path(args.run_dir).resolve()
|
||||||
|
output_dir = Path(args.output_dir).resolve() if args.output_dir else run_dir
|
||||||
|
output_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
network_samples = load_glob_jsonl(run_dir, "a-network-summary.*.jsonl")
|
||||||
|
control_events = load_glob_jsonl(run_dir, "a-control-events.*.jsonl")
|
||||||
|
control_acks = load_glob_jsonl(run_dir, "a-control-acks.*.jsonl")
|
||||||
|
frame_recv_records = load_glob_jsonl(run_dir, "a-video-frame-recv.*.jsonl")
|
||||||
|
display_probe_records = load_glob_jsonl(run_dir, "a-video-display-probe.*.jsonl")
|
||||||
|
|
||||||
|
net_epochs = flatten_net_epoch(network_samples)
|
||||||
|
control_estimates = aggregate_control_estimates(network_samples, control_events, control_acks)
|
||||||
|
video_estimates = aggregate_video_estimates(network_samples, frame_recv_records, display_probe_records)
|
||||||
|
|
||||||
|
write_jsonl(output_dir / "net-epoch-summary.jsonl", net_epochs)
|
||||||
|
write_jsonl(output_dir / "control-latency-estimates.jsonl", control_estimates)
|
||||||
|
write_jsonl(output_dir / "video-latency-estimates.jsonl", video_estimates)
|
||||||
|
write_html_summary(
|
||||||
|
output_dir / "latency-estimates.html",
|
||||||
|
net_epochs=net_epochs,
|
||||||
|
control_estimates=control_estimates,
|
||||||
|
video_estimates=video_estimates,
|
||||||
|
)
|
||||||
|
return 0
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
raise SystemExit(main())
|
||||||
111
scripts/dev/apply-camera-controls.sh
Normal file
111
scripts/dev/apply-camera-controls.sh
Normal file
@@ -0,0 +1,111 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||||
|
# shellcheck disable=SC1091
|
||||||
|
source "${SCRIPT_DIR}/load-env.sh"
|
||||||
|
|
||||||
|
camera_device="${OMNI_CAMERA_DEVICE}"
|
||||||
|
camera_profile="${OMNI_CAMERA_PROFILE}"
|
||||||
|
camera_brightness="${OMNI_CAMERA_BRIGHTNESS}"
|
||||||
|
camera_custom_ctrl="${OMNI_CAMERA_CUSTOM_CTRL}"
|
||||||
|
camera_verify="${OMNI_CAMERA_VERIFY}"
|
||||||
|
|
||||||
|
is_truthy() {
|
||||||
|
case "${1:-0}" in
|
||||||
|
1|true|TRUE|yes|YES|on|ON)
|
||||||
|
return 0
|
||||||
|
;;
|
||||||
|
*)
|
||||||
|
return 1
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
}
|
||||||
|
|
||||||
|
require_v4l2_ctl() {
|
||||||
|
if command -v v4l2-ctl >/dev/null 2>&1; then
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "Missing required command: v4l2-ctl. Install v4l-utils on the robot side before starting b_side_omnid." >&2
|
||||||
|
exit 1
|
||||||
|
}
|
||||||
|
|
||||||
|
run_v4l2_ctl() {
|
||||||
|
v4l2-ctl -d "${camera_device}" "$@"
|
||||||
|
}
|
||||||
|
|
||||||
|
set_ctrl() {
|
||||||
|
local ctrl="$1"
|
||||||
|
|
||||||
|
echo "[camera-controls] set ${camera_device} ${ctrl}"
|
||||||
|
run_v4l2_ctl "--set-ctrl=${ctrl}"
|
||||||
|
}
|
||||||
|
|
||||||
|
verify_ctrl() {
|
||||||
|
local ctrl="$1"
|
||||||
|
|
||||||
|
echo "[camera-controls] verify ${camera_device} ${ctrl}"
|
||||||
|
run_v4l2_ctl "--get-ctrl=${ctrl}"
|
||||||
|
}
|
||||||
|
|
||||||
|
needs_v4l2_ctl=0
|
||||||
|
|
||||||
|
case "${camera_profile}" in
|
||||||
|
night)
|
||||||
|
needs_v4l2_ctl=1
|
||||||
|
;;
|
||||||
|
day)
|
||||||
|
if [[ -n "${camera_brightness}" ]]; then
|
||||||
|
needs_v4l2_ctl=1
|
||||||
|
fi
|
||||||
|
;;
|
||||||
|
custom)
|
||||||
|
if [[ -z "${camera_custom_ctrl}" ]]; then
|
||||||
|
echo "OMNI_CAMERA_CUSTOM_CTRL must be non-empty when OMNI_CAMERA_PROFILE=custom." >&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
needs_v4l2_ctl=1
|
||||||
|
;;
|
||||||
|
*)
|
||||||
|
echo "Unsupported OMNI_CAMERA_PROFILE: ${camera_profile}. Expected one of: night, day, custom." >&2
|
||||||
|
exit 1
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
|
||||||
|
if is_truthy "${camera_verify}"; then
|
||||||
|
needs_v4l2_ctl=1
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [[ "${needs_v4l2_ctl}" == "0" ]]; then
|
||||||
|
echo "[camera-controls] profile=${camera_profile}; no camera controls requested"
|
||||||
|
exit 0
|
||||||
|
fi
|
||||||
|
|
||||||
|
require_v4l2_ctl
|
||||||
|
|
||||||
|
case "${camera_profile}" in
|
||||||
|
night)
|
||||||
|
set_ctrl "auto_exposure=1"
|
||||||
|
set_ctrl "exposure_time_absolute=800"
|
||||||
|
set_ctrl "gain=64"
|
||||||
|
if [[ -n "${camera_brightness}" ]]; then
|
||||||
|
set_ctrl "brightness=${camera_brightness}"
|
||||||
|
fi
|
||||||
|
;;
|
||||||
|
day)
|
||||||
|
if [[ -n "${camera_brightness}" ]]; then
|
||||||
|
set_ctrl "brightness=${camera_brightness}"
|
||||||
|
fi
|
||||||
|
;;
|
||||||
|
custom)
|
||||||
|
set_ctrl "${camera_custom_ctrl}"
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
|
||||||
|
if is_truthy "${camera_verify}"; then
|
||||||
|
verify_ctrl "auto_exposure"
|
||||||
|
verify_ctrl "exposure_time_absolute"
|
||||||
|
verify_ctrl "gain"
|
||||||
|
verify_ctrl "brightness"
|
||||||
|
fi
|
||||||
@@ -1,14 +1,31 @@
|
|||||||
#!/usr/bin/env bash
|
#!/usr/bin/env bash
|
||||||
set -euo pipefail
|
set -euo pipefail
|
||||||
|
|
||||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
LOAD_ENV_SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||||
DEFAULT_OMNISOCKETGO_ROOT="$(cd "${SCRIPT_DIR}/../.." && pwd)"
|
DEFAULT_OMNISOCKETGO_ROOT="$(cd "${LOAD_ENV_SCRIPT_DIR}/../.." && pwd)"
|
||||||
|
|
||||||
die() {
|
die() {
|
||||||
echo "$*" >&2
|
echo "$*" >&2
|
||||||
return 1 2>/dev/null || exit 1
|
return 1 2>/dev/null || exit 1
|
||||||
}
|
}
|
||||||
|
|
||||||
|
normalize_loaded_env_vars() {
|
||||||
|
local var_name
|
||||||
|
local value
|
||||||
|
|
||||||
|
for var_name in $(compgen -A variable); do
|
||||||
|
case "${var_name}" in
|
||||||
|
BACKEND_*|BLITZ_*|B_SIDE_*|CONTROL_*|FRONTEND_*|OMNI_*|PYTHON3_BIN|PYTHON_VENV_PATH|ROBOT_*|ROS_DISTRO|VITE_*)
|
||||||
|
value="${!var_name}"
|
||||||
|
if [[ "${value}" == *$'\r' ]]; then
|
||||||
|
printf -v "${var_name}" '%s' "${value%$'\r'}"
|
||||||
|
export "${var_name}"
|
||||||
|
fi
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
done
|
||||||
|
}
|
||||||
|
|
||||||
is_omnisocketgo_root() {
|
is_omnisocketgo_root() {
|
||||||
local dir="$1"
|
local dir="$1"
|
||||||
[[ -f "${dir}/Makefile" && -f "${dir}/cmd/b_side_omnid.c" && -d "${dir}/ros-control-py" ]]
|
[[ -f "${dir}/Makefile" && -f "${dir}/cmd/b_side_omnid.c" && -d "${dir}/ros-control-py" ]]
|
||||||
@@ -19,11 +36,44 @@ is_robot_command_center_root() {
|
|||||||
[[ -f "${dir}/backend/config/asgi.py" && -f "${dir}/frontend/package.json" ]]
|
[[ -f "${dir}/backend/config/asgi.py" && -f "${dir}/frontend/package.json" ]]
|
||||||
}
|
}
|
||||||
|
|
||||||
|
require_robot_command_center_root() {
|
||||||
|
if ! is_robot_command_center_root "${ROBOT_COMMAND_CENTER_ROOT}"; then
|
||||||
|
die "ROBOT_COMMAND_CENTER_ROOT must point to the robot-command-center repo root. Current value: ${ROBOT_COMMAND_CENTER_ROOT}. Set it in ${LOAD_ENV_SCRIPT_DIR}/robot-remote.env.local if needed."
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
export OMNISOCKETGO_ROOT="${OMNISOCKETGO_ROOT:-${DEFAULT_OMNISOCKETGO_ROOT}}"
|
export OMNISOCKETGO_ROOT="${OMNISOCKETGO_ROOT:-${DEFAULT_OMNISOCKETGO_ROOT}}"
|
||||||
|
|
||||||
|
omni_camera_device_was_set=0
|
||||||
|
omni_camera_profile_was_set=0
|
||||||
|
omni_camera_brightness_was_set=0
|
||||||
|
omni_camera_custom_ctrl_was_set=0
|
||||||
|
omni_camera_verify_was_set=0
|
||||||
|
|
||||||
|
if [[ "${OMNI_CAMERA_DEVICE+x}" == "x" ]]; then
|
||||||
|
omni_camera_device_was_set=1
|
||||||
|
preserved_omni_camera_device="${OMNI_CAMERA_DEVICE}"
|
||||||
|
fi
|
||||||
|
if [[ "${OMNI_CAMERA_PROFILE+x}" == "x" ]]; then
|
||||||
|
omni_camera_profile_was_set=1
|
||||||
|
preserved_omni_camera_profile="${OMNI_CAMERA_PROFILE}"
|
||||||
|
fi
|
||||||
|
if [[ "${OMNI_CAMERA_BRIGHTNESS+x}" == "x" ]]; then
|
||||||
|
omni_camera_brightness_was_set=1
|
||||||
|
preserved_omni_camera_brightness="${OMNI_CAMERA_BRIGHTNESS}"
|
||||||
|
fi
|
||||||
|
if [[ "${OMNI_CAMERA_CUSTOM_CTRL+x}" == "x" ]]; then
|
||||||
|
omni_camera_custom_ctrl_was_set=1
|
||||||
|
preserved_omni_camera_custom_ctrl="${OMNI_CAMERA_CUSTOM_CTRL}"
|
||||||
|
fi
|
||||||
|
if [[ "${OMNI_CAMERA_VERIFY+x}" == "x" ]]; then
|
||||||
|
omni_camera_verify_was_set=1
|
||||||
|
preserved_omni_camera_verify="${OMNI_CAMERA_VERIFY}"
|
||||||
|
fi
|
||||||
|
|
||||||
ENV_FILES=(
|
ENV_FILES=(
|
||||||
"${SCRIPT_DIR}/robot-remote.env"
|
"${LOAD_ENV_SCRIPT_DIR}/robot-remote.env"
|
||||||
"${SCRIPT_DIR}/robot-remote.env.local"
|
"${LOAD_ENV_SCRIPT_DIR}/robot-remote.env.local"
|
||||||
)
|
)
|
||||||
|
|
||||||
for env_file in "${ENV_FILES[@]}"; do
|
for env_file in "${ENV_FILES[@]}"; do
|
||||||
@@ -35,6 +85,24 @@ for env_file in "${ENV_FILES[@]}"; do
|
|||||||
fi
|
fi
|
||||||
done
|
done
|
||||||
|
|
||||||
|
normalize_loaded_env_vars
|
||||||
|
|
||||||
|
if [[ "${omni_camera_device_was_set}" == "1" ]]; then
|
||||||
|
export OMNI_CAMERA_DEVICE="${preserved_omni_camera_device}"
|
||||||
|
fi
|
||||||
|
if [[ "${omni_camera_profile_was_set}" == "1" ]]; then
|
||||||
|
export OMNI_CAMERA_PROFILE="${preserved_omni_camera_profile}"
|
||||||
|
fi
|
||||||
|
if [[ "${omni_camera_brightness_was_set}" == "1" ]]; then
|
||||||
|
export OMNI_CAMERA_BRIGHTNESS="${preserved_omni_camera_brightness}"
|
||||||
|
fi
|
||||||
|
if [[ "${omni_camera_custom_ctrl_was_set}" == "1" ]]; then
|
||||||
|
export OMNI_CAMERA_CUSTOM_CTRL="${preserved_omni_camera_custom_ctrl}"
|
||||||
|
fi
|
||||||
|
if [[ "${omni_camera_verify_was_set}" == "1" ]]; then
|
||||||
|
export OMNI_CAMERA_VERIFY="${preserved_omni_camera_verify}"
|
||||||
|
fi
|
||||||
|
|
||||||
export OMNISOCKETGO_ROOT="${OMNISOCKETGO_ROOT:-${DEFAULT_OMNISOCKETGO_ROOT}}"
|
export OMNISOCKETGO_ROOT="${OMNISOCKETGO_ROOT:-${DEFAULT_OMNISOCKETGO_ROOT}}"
|
||||||
export ROBOT_COMMAND_CENTER_ROOT="${ROBOT_COMMAND_CENTER_ROOT:-$(dirname "${OMNISOCKETGO_ROOT}")/robot-command-center}"
|
export ROBOT_COMMAND_CENTER_ROOT="${ROBOT_COMMAND_CENTER_ROOT:-$(dirname "${OMNISOCKETGO_ROOT}")/robot-command-center}"
|
||||||
|
|
||||||
@@ -42,10 +110,6 @@ if ! is_omnisocketgo_root "${OMNISOCKETGO_ROOT}"; then
|
|||||||
die "OMNISOCKETGO_ROOT must point to the OmniSocketGo repo root. Current value: ${OMNISOCKETGO_ROOT}"
|
die "OMNISOCKETGO_ROOT must point to the OmniSocketGo repo root. Current value: ${OMNISOCKETGO_ROOT}"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
if ! is_robot_command_center_root "${ROBOT_COMMAND_CENTER_ROOT}"; then
|
|
||||||
die "ROBOT_COMMAND_CENTER_ROOT must point to the robot-command-center repo root. Current value: ${ROBOT_COMMAND_CENTER_ROOT}. Set it in ${SCRIPT_DIR}/robot-remote.env.local if needed."
|
|
||||||
fi
|
|
||||||
|
|
||||||
export BACKEND_DIR="${BACKEND_DIR:-${ROBOT_COMMAND_CENTER_ROOT}/backend}"
|
export BACKEND_DIR="${BACKEND_DIR:-${ROBOT_COMMAND_CENTER_ROOT}/backend}"
|
||||||
export FRONTEND_DIR="${FRONTEND_DIR:-${ROBOT_COMMAND_CENTER_ROOT}/frontend}"
|
export FRONTEND_DIR="${FRONTEND_DIR:-${ROBOT_COMMAND_CENTER_ROOT}/frontend}"
|
||||||
export ROS_CONTROL_PY_DIR="${ROS_CONTROL_PY_DIR:-${OMNISOCKETGO_ROOT}/ros-control-py}"
|
export ROS_CONTROL_PY_DIR="${ROS_CONTROL_PY_DIR:-${OMNISOCKETGO_ROOT}/ros-control-py}"
|
||||||
@@ -55,6 +119,13 @@ export BACKEND_HOST="${BACKEND_HOST:-0.0.0.0}"
|
|||||||
export BACKEND_PORT="${BACKEND_PORT:-8001}"
|
export BACKEND_PORT="${BACKEND_PORT:-8001}"
|
||||||
export FRONTEND_HOST="${FRONTEND_HOST:-0.0.0.0}"
|
export FRONTEND_HOST="${FRONTEND_HOST:-0.0.0.0}"
|
||||||
export FRONTEND_PORT="${FRONTEND_PORT:-5173}"
|
export FRONTEND_PORT="${FRONTEND_PORT:-5173}"
|
||||||
|
export OMNISOCKET_TELEMETRY_PEER_ID="${OMNISOCKET_TELEMETRY_PEER_ID:-peer-a-telemetry}"
|
||||||
|
export OMNISOCKET_TELEMETRY_INTERVAL_MS="${OMNISOCKET_TELEMETRY_INTERVAL_MS:-1000}"
|
||||||
|
export OMNISOCKET_TELEMETRY_STALE_AFTER_MS="${OMNISOCKET_TELEMETRY_STALE_AFTER_MS:-3000}"
|
||||||
|
export OMNI_NETWORK_SUMMARY_LOG_ENABLED="${OMNI_NETWORK_SUMMARY_LOG_ENABLED:-1}"
|
||||||
|
export OMNI_NETWORK_SUMMARY_LOG_PATH="${OMNI_NETWORK_SUMMARY_LOG_PATH:-${OMNISOCKETGO_ROOT}/logs/a-network-summary.jsonl}"
|
||||||
|
export OMNI_NETWORK_SUMMARY_LOG_INTERVAL_MS="${OMNI_NETWORK_SUMMARY_LOG_INTERVAL_MS:-1000}"
|
||||||
|
export OMNI_NETWORK_SUMMARY_LOG_REQUEST_TIMEOUT_SEC="${OMNI_NETWORK_SUMMARY_LOG_REQUEST_TIMEOUT_SEC:-3}"
|
||||||
export CONTROL_SIDE_OMNISOCKET_SERVER_ADDR="${CONTROL_SIDE_OMNISOCKET_SERVER_ADDR:-}"
|
export CONTROL_SIDE_OMNISOCKET_SERVER_ADDR="${CONTROL_SIDE_OMNISOCKET_SERVER_ADDR:-}"
|
||||||
export CONTROL_SIDE_OMNISOCKET_RELAY_VIA="${CONTROL_SIDE_OMNISOCKET_RELAY_VIA:-}"
|
export CONTROL_SIDE_OMNISOCKET_RELAY_VIA="${CONTROL_SIDE_OMNISOCKET_RELAY_VIA:-}"
|
||||||
export ROBOT_SIDE_OMNISOCKET_SERVER_ADDR="${ROBOT_SIDE_OMNISOCKET_SERVER_ADDR:-}"
|
export ROBOT_SIDE_OMNISOCKET_SERVER_ADDR="${ROBOT_SIDE_OMNISOCKET_SERVER_ADDR:-}"
|
||||||
@@ -70,9 +141,184 @@ export ROBOT_RECEIVER_OUTPUT_TOPIC="${ROBOT_RECEIVER_OUTPUT_TOPIC:-/hric/robot/c
|
|||||||
export ROBOT_RECEIVER_FRAME_ID="${ROBOT_RECEIVER_FRAME_ID:-pelvis}"
|
export ROBOT_RECEIVER_FRAME_ID="${ROBOT_RECEIVER_FRAME_ID:-pelvis}"
|
||||||
export ROBOT_RECEIVER_WATCHDOG_TIMEOUT="${ROBOT_RECEIVER_WATCHDOG_TIMEOUT:-0.5}"
|
export ROBOT_RECEIVER_WATCHDOG_TIMEOUT="${ROBOT_RECEIVER_WATCHDOG_TIMEOUT:-0.5}"
|
||||||
export ROBOT_RECEIVER_PUBLISH_RATE_HZ="${ROBOT_RECEIVER_PUBLISH_RATE_HZ:-100.0}"
|
export ROBOT_RECEIVER_PUBLISH_RATE_HZ="${ROBOT_RECEIVER_PUBLISH_RATE_HZ:-100.0}"
|
||||||
|
export OMNI_CAMERA_DEVICE="${OMNI_CAMERA_DEVICE:-/dev/video0}"
|
||||||
|
export OMNI_CAMERA_PROFILE="${OMNI_CAMERA_PROFILE:-night}"
|
||||||
|
export OMNI_CAMERA_BRIGHTNESS="${OMNI_CAMERA_BRIGHTNESS:-}"
|
||||||
|
export OMNI_CAMERA_CUSTOM_CTRL="${OMNI_CAMERA_CUSTOM_CTRL:-}"
|
||||||
|
export OMNI_CAMERA_VERIFY="${OMNI_CAMERA_VERIFY:-0}"
|
||||||
|
export OMNI_GPSD_HOST="${OMNI_GPSD_HOST:-127.0.0.1}"
|
||||||
export OMNI_VIDEO_SERVER_ADDR="${OMNI_VIDEO_SERVER_ADDR:-${ROBOT_SIDE_OMNISOCKET_SERVER_ADDR:-}}"
|
export OMNI_VIDEO_SERVER_ADDR="${OMNI_VIDEO_SERVER_ADDR:-${ROBOT_SIDE_OMNISOCKET_SERVER_ADDR:-}}"
|
||||||
export OMNI_VIDEO_RELAY_VIA="${OMNI_VIDEO_RELAY_VIA:-${ROBOT_SIDE_OMNISOCKET_RELAY_VIA:-}}"
|
export OMNI_VIDEO_RELAY_VIA="${OMNI_VIDEO_RELAY_VIA:-${ROBOT_SIDE_OMNISOCKET_RELAY_VIA:-}}"
|
||||||
export OMNI_CONTROL_SERVER_ADDR="${OMNI_CONTROL_SERVER_ADDR:-${ROBOT_SIDE_OMNISOCKET_SERVER_ADDR:-}}"
|
export OMNI_CONTROL_SERVER_ADDR="${OMNI_CONTROL_SERVER_ADDR:-${ROBOT_SIDE_OMNISOCKET_SERVER_ADDR:-}}"
|
||||||
export OMNI_CONTROL_RELAY_VIA="${OMNI_CONTROL_RELAY_VIA:-${ROBOT_SIDE_OMNISOCKET_RELAY_VIA:-}}"
|
export OMNI_CONTROL_RELAY_VIA="${OMNI_CONTROL_RELAY_VIA:-${ROBOT_SIDE_OMNISOCKET_RELAY_VIA:-}}"
|
||||||
export OMNI_CONTROL_UNIX_SOCKET_PATH="${OMNI_CONTROL_UNIX_SOCKET_PATH:-${ROBOT_RECEIVER_LOCAL_SOCKET_PATH}}"
|
export OMNI_CONTROL_UNIX_SOCKET_PATH="${OMNI_CONTROL_UNIX_SOCKET_PATH:-${ROBOT_RECEIVER_LOCAL_SOCKET_PATH}}"
|
||||||
|
export OMNI_CONTROL_ACK_PEER_ID="${OMNI_CONTROL_ACK_PEER_ID:-peer-b-ctrl-ack}"
|
||||||
|
export OMNI_CONTROL_ACK_TARGET_PEER="${OMNI_CONTROL_ACK_TARGET_PEER:-peer-a-ctrl-ack}"
|
||||||
export B_SIDE_OMNID_USE_SUDO="${B_SIDE_OMNID_USE_SUDO:-1}"
|
export B_SIDE_OMNID_USE_SUDO="${B_SIDE_OMNID_USE_SUDO:-1}"
|
||||||
|
export BLITZ_RUNTIME_DIR="${BLITZ_RUNTIME_DIR:-${OMNISOCKETGO_ROOT}/logs/runtime}"
|
||||||
|
export BLITZ_RUN_ROOT="${BLITZ_RUN_ROOT:-${OMNISOCKETGO_ROOT}/logs}"
|
||||||
|
export BLITZ_RUN_CONTEXT_FILE="${BLITZ_RUN_CONTEXT_FILE:-${BLITZ_RUNTIME_DIR}/run-context.env}"
|
||||||
|
export BLITZ_RUN_ID_FILE="${BLITZ_RUN_ID_FILE:-${BLITZ_RUNTIME_DIR}/run-id}"
|
||||||
|
export BLITZ_CURRENT_RUN_LINK="${BLITZ_CURRENT_RUN_LINK:-${BLITZ_RUN_ROOT}/current}"
|
||||||
|
export BLITZ_5G_INTERFACE="${BLITZ_5G_INTERFACE:-}"
|
||||||
|
export BLITZ_5G_MODEM_SUBNET="${BLITZ_5G_MODEM_SUBNET:-192.168.224.0/22}"
|
||||||
|
export BLITZ_5G_GATEWAY="${BLITZ_5G_GATEWAY:-192.168.225.1}"
|
||||||
|
export BLITZ_5G_ROUTE_TARGETS="${BLITZ_5G_ROUTE_TARGETS:-106.55.173.235}"
|
||||||
|
export BLITZ_5G_INFO_JSON="${BLITZ_5G_INFO_JSON:-${OMNISOCKETGO_ROOT}/scripts/boot/modem_network_info.json}"
|
||||||
|
export BLITZ_TIME_SERVER_IP="${BLITZ_TIME_SERVER_IP:-}"
|
||||||
|
export BLITZ_KCP_STATS_INTERVAL_MS="${BLITZ_KCP_STATS_INTERVAL_MS:-1000}"
|
||||||
|
export BLITZ_CONTROL_LATENCY_LOG_ENABLED="${BLITZ_CONTROL_LATENCY_LOG_ENABLED:-1}"
|
||||||
|
export BLITZ_CONTROL_LATENCY_LOG_SAMPLE_MOD="${BLITZ_CONTROL_LATENCY_LOG_SAMPLE_MOD:-100}"
|
||||||
|
export BLITZ_CONTROL_ACK_SAMPLE_MOD="${BLITZ_CONTROL_ACK_SAMPLE_MOD:-10}"
|
||||||
|
export BLITZ_VIDEO_STAGE_LOG_ENABLED="${BLITZ_VIDEO_STAGE_LOG_ENABLED:-1}"
|
||||||
|
export BLITZ_VIDEO_STAGE_LOG_SAMPLE_MOD="${BLITZ_VIDEO_STAGE_LOG_SAMPLE_MOD:-10}"
|
||||||
|
export BLITZ_5G_LINK_LOG_INTERVAL_SEC="${BLITZ_5G_LINK_LOG_INTERVAL_SEC:-5}"
|
||||||
|
export BLITZ_JSONL_FLUSH_INTERVAL_MS="${BLITZ_JSONL_FLUSH_INTERVAL_MS:-1000}"
|
||||||
|
export BLITZ_JSONL_FLUSH_BYTES="${BLITZ_JSONL_FLUSH_BYTES:-262144}"
|
||||||
|
export BLITZ_JSONL_ROTATE_BYTES="${BLITZ_JSONL_ROTATE_BYTES:-134217728}"
|
||||||
|
export BLITZ_JSONL_ROTATE_FILES="${BLITZ_JSONL_ROTATE_FILES:-8}"
|
||||||
|
|
||||||
|
blitz_dev_utc_compact_timestamp() {
|
||||||
|
date -u '+%Y%m%dT%H%M%SZ'
|
||||||
|
}
|
||||||
|
|
||||||
|
blitz_dev_git_commit() {
|
||||||
|
git -C "${OMNISOCKETGO_ROOT}" rev-parse HEAD 2>/dev/null || true
|
||||||
|
}
|
||||||
|
|
||||||
|
blitz_dev_git_dirty_flag() {
|
||||||
|
if git -C "${OMNISOCKETGO_ROOT}" diff --quiet --ignore-submodules=dirty >/dev/null 2>&1; then
|
||||||
|
printf '0\n'
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
printf '1\n'
|
||||||
|
}
|
||||||
|
|
||||||
|
blitz_dev_prepare_dirs() {
|
||||||
|
mkdir -p "${BLITZ_RUNTIME_DIR}" "${BLITZ_RUN_ROOT}/runs" "${BLITZ_RUN_ROOT}/incidents"
|
||||||
|
}
|
||||||
|
|
||||||
|
blitz_dev_write_run_info() {
|
||||||
|
local run_dir="$1"
|
||||||
|
local run_id="$2"
|
||||||
|
local boot_id="$3"
|
||||||
|
local tmp_info="${run_dir}/run-info.json.tmp.$$"
|
||||||
|
local started_at
|
||||||
|
local commit_hash
|
||||||
|
local dirty_flag
|
||||||
|
|
||||||
|
started_at="$(date -u '+%Y-%m-%dT%H:%M:%SZ')"
|
||||||
|
commit_hash="$(blitz_dev_git_commit)"
|
||||||
|
dirty_flag="$(blitz_dev_git_dirty_flag)"
|
||||||
|
|
||||||
|
python3 - "${tmp_info}" "${run_id}" "${run_dir}" "${boot_id}" "${started_at}" "${commit_hash}" "${dirty_flag}" "${HOSTNAME:-$(hostname)}" <<'PY'
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
|
||||||
|
path, run_id, run_dir, boot_id, started_at, commit_hash, dirty_flag, hostname = sys.argv[1:9]
|
||||||
|
payload = {
|
||||||
|
"run_id": run_id,
|
||||||
|
"run_dir": run_dir,
|
||||||
|
"boot_id": boot_id,
|
||||||
|
"started_at": started_at,
|
||||||
|
"hostname": hostname,
|
||||||
|
"git_commit": commit_hash,
|
||||||
|
"git_dirty": dirty_flag == "1",
|
||||||
|
"env": {
|
||||||
|
key: os.environ.get(key, "")
|
||||||
|
for key in sorted(os.environ)
|
||||||
|
if key.startswith(("BLITZ_", "OMNI_", "ROBOT_RECEIVER_"))
|
||||||
|
},
|
||||||
|
}
|
||||||
|
with open(path, "w", encoding="utf-8") as handle:
|
||||||
|
json.dump(payload, handle, ensure_ascii=False, indent=2, sort_keys=True)
|
||||||
|
PY
|
||||||
|
mv -f "${tmp_info}" "${run_dir}/run-info.json"
|
||||||
|
}
|
||||||
|
|
||||||
|
blitz_dev_init_run_context() {
|
||||||
|
local run_id="${1:-$(blitz_dev_utc_compact_timestamp)}"
|
||||||
|
local boot_id="dev-$(blitz_dev_utc_compact_timestamp)"
|
||||||
|
local run_dir="${BLITZ_RUN_ROOT}/runs/${run_id}"
|
||||||
|
local tmp_context="${BLITZ_RUN_CONTEXT_FILE}.tmp.$$"
|
||||||
|
|
||||||
|
blitz_dev_prepare_dirs
|
||||||
|
mkdir -p "${run_dir}"
|
||||||
|
export BLITZ_RUN_ID="${run_id}"
|
||||||
|
export BLITZ_RUN_DIR="${run_dir}"
|
||||||
|
export BLITZ_BOOT_ID="${boot_id}"
|
||||||
|
printf '%s\n' "${run_id}" > "${BLITZ_RUN_ID_FILE}"
|
||||||
|
cat > "${tmp_context}" <<EOF
|
||||||
|
BLITZ_RUN_ID=${run_id}
|
||||||
|
BLITZ_RUN_DIR=${run_dir}
|
||||||
|
BLITZ_BOOT_ID=${boot_id}
|
||||||
|
BLITZ_RUN_ROOT=${BLITZ_RUN_ROOT}
|
||||||
|
EOF
|
||||||
|
mv -f "${tmp_context}" "${BLITZ_RUN_CONTEXT_FILE}"
|
||||||
|
ln -sfn "${run_dir}" "${BLITZ_CURRENT_RUN_LINK}"
|
||||||
|
blitz_dev_write_run_info "${run_dir}" "${run_id}" "${boot_id}"
|
||||||
|
}
|
||||||
|
|
||||||
|
blitz_dev_load_run_context() {
|
||||||
|
if [[ ! -f "${BLITZ_RUN_CONTEXT_FILE}" ]]; then
|
||||||
|
return 1
|
||||||
|
fi
|
||||||
|
set -a
|
||||||
|
# shellcheck disable=SC1090
|
||||||
|
source "${BLITZ_RUN_CONTEXT_FILE}"
|
||||||
|
set +a
|
||||||
|
}
|
||||||
|
|
||||||
|
blitz_dev_ensure_run_context() {
|
||||||
|
if blitz_dev_load_run_context; then
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
blitz_dev_init_run_context
|
||||||
|
}
|
||||||
|
|
||||||
|
blitz_dev_reset_run_context() {
|
||||||
|
rm -f "${BLITZ_RUN_CONTEXT_FILE}" "${BLITZ_RUN_ID_FILE}"
|
||||||
|
blitz_dev_init_run_context
|
||||||
|
}
|
||||||
|
|
||||||
|
blitz_dev_init_instance_context() {
|
||||||
|
if [[ -z "${BLITZ_INSTANCE_ID:-}" ]]; then
|
||||||
|
export BLITZ_INSTANCE_ID="$(blitz_dev_utc_compact_timestamp)-$$"
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
blitz_dev_component_log_path() {
|
||||||
|
local stem="$1"
|
||||||
|
printf '%s/%s.%s.jsonl\n' "${BLITZ_RUN_DIR}" "${stem}" "${BLITZ_INSTANCE_ID}"
|
||||||
|
}
|
||||||
|
|
||||||
|
blitz_dev_prepare_backend_logging_env() {
|
||||||
|
blitz_dev_init_instance_context
|
||||||
|
if [[ "${OMNI_NETWORK_SUMMARY_LOG_PATH}" == "${OMNISOCKETGO_ROOT}/logs/a-network-summary.jsonl" ]]; then
|
||||||
|
export OMNI_NETWORK_SUMMARY_LOG_PATH
|
||||||
|
OMNI_NETWORK_SUMMARY_LOG_PATH="$(blitz_dev_component_log_path "a-network-summary")"
|
||||||
|
fi
|
||||||
|
export BLITZ_A_CONTROL_EVENTS_LOG_PATH="${BLITZ_A_CONTROL_EVENTS_LOG_PATH:-$(blitz_dev_component_log_path "a-control-events")}"
|
||||||
|
export BLITZ_A_CONTROL_ACKS_LOG_PATH="${BLITZ_A_CONTROL_ACKS_LOG_PATH:-$(blitz_dev_component_log_path "a-control-acks")}"
|
||||||
|
export BLITZ_A_VIDEO_FRAME_RECV_LOG_PATH="${BLITZ_A_VIDEO_FRAME_RECV_LOG_PATH:-$(blitz_dev_component_log_path "a-video-frame-recv")}"
|
||||||
|
export BLITZ_A_VIDEO_DISPLAY_PROBE_LOG_PATH="${BLITZ_A_VIDEO_DISPLAY_PROBE_LOG_PATH:-$(blitz_dev_component_log_path "a-video-display-probe")}"
|
||||||
|
}
|
||||||
|
|
||||||
|
blitz_dev_prepare_bside_logging_env() {
|
||||||
|
blitz_dev_init_instance_context
|
||||||
|
export BLITZ_KCP_STATS_LOG_PATH="${BLITZ_KCP_STATS_LOG_PATH:-$(blitz_dev_component_log_path "b-kcp-session-stats")}"
|
||||||
|
export BLITZ_CONTROL_LATENCY_LOG_PATH="${BLITZ_CONTROL_LATENCY_LOG_PATH:-$(blitz_dev_component_log_path "b-control-latency")}"
|
||||||
|
export BLITZ_VIDEO_STAGE_LOG_PATH="${BLITZ_VIDEO_STAGE_LOG_PATH:-$(blitz_dev_component_log_path "b-video-frame-stages")}"
|
||||||
|
}
|
||||||
|
|
||||||
|
blitz_dev_prepare_5g_logging_env() {
|
||||||
|
blitz_dev_init_instance_context
|
||||||
|
export BLITZ_5G_LINK_LOG_PATH="${BLITZ_5G_LINK_LOG_PATH:-$(blitz_dev_component_log_path "b-5g-link-quality")}"
|
||||||
|
}
|
||||||
|
|
||||||
|
if [[ "${BLITZ_SKIP_DEV_RUN_CONTEXT_INIT:-0}" != "1" && "${BLITZ_BOOT_LOADING_ENV:-0}" != "1" && "${OMNI_BOOT_MODE:-0}" != "1" ]]; then
|
||||||
|
blitz_dev_ensure_run_context
|
||||||
|
elif [[ -f "${BLITZ_RUN_CONTEXT_FILE}" ]]; then
|
||||||
|
blitz_dev_load_run_context || true
|
||||||
|
fi
|
||||||
|
|||||||
100
scripts/dev/log-network-summary.py
Normal file
100
scripts/dev/log-network-summary.py
Normal file
@@ -0,0 +1,100 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import json
|
||||||
|
import signal
|
||||||
|
import sys
|
||||||
|
import time
|
||||||
|
import urllib.error
|
||||||
|
import urllib.request
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
|
||||||
|
STOP_REQUESTED = False
|
||||||
|
|
||||||
|
|
||||||
|
def handle_signal(signum: int, frame: object) -> None:
|
||||||
|
del signum, frame
|
||||||
|
global STOP_REQUESTED
|
||||||
|
STOP_REQUESTED = True
|
||||||
|
|
||||||
|
|
||||||
|
def parse_args() -> argparse.Namespace:
|
||||||
|
parser = argparse.ArgumentParser(description="Poll /api/network/latest/ and append JSONL snapshots.")
|
||||||
|
parser.add_argument("--url", required=True, help="HTTP endpoint that returns the network summary JSON.")
|
||||||
|
parser.add_argument("--output", required=True, help="Output JSONL path.")
|
||||||
|
parser.add_argument(
|
||||||
|
"--interval-ms",
|
||||||
|
type=int,
|
||||||
|
default=2000,
|
||||||
|
help="Polling interval in milliseconds. Default: 2000.",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--request-timeout-sec",
|
||||||
|
type=float,
|
||||||
|
default=3.0,
|
||||||
|
help="Single request timeout in seconds. Default: 3.0.",
|
||||||
|
)
|
||||||
|
return parser.parse_args()
|
||||||
|
|
||||||
|
|
||||||
|
def sleep_with_stop(seconds: float) -> None:
|
||||||
|
deadline = time.monotonic() + max(0.0, seconds)
|
||||||
|
while not STOP_REQUESTED:
|
||||||
|
remaining = deadline - time.monotonic()
|
||||||
|
if remaining <= 0.0:
|
||||||
|
return
|
||||||
|
time.sleep(min(remaining, 0.2))
|
||||||
|
|
||||||
|
|
||||||
|
def fetch_json(url: str, timeout_sec: float) -> str:
|
||||||
|
request = urllib.request.Request(
|
||||||
|
url,
|
||||||
|
headers={
|
||||||
|
"Accept": "application/json",
|
||||||
|
"Cache-Control": "no-cache",
|
||||||
|
},
|
||||||
|
method="GET",
|
||||||
|
)
|
||||||
|
with urllib.request.urlopen(request, timeout=timeout_sec) as response:
|
||||||
|
charset = response.headers.get_content_charset("utf-8")
|
||||||
|
payload = response.read().decode(charset)
|
||||||
|
parsed = json.loads(payload)
|
||||||
|
return json.dumps(parsed, separators=(",", ":"), ensure_ascii=False)
|
||||||
|
|
||||||
|
|
||||||
|
def main() -> int:
|
||||||
|
args = parse_args()
|
||||||
|
interval_sec = max(args.interval_ms, 200) / 1000.0
|
||||||
|
output_path = Path(args.output)
|
||||||
|
last_error_log_monotonic = 0.0
|
||||||
|
|
||||||
|
signal.signal(signal.SIGINT, handle_signal)
|
||||||
|
signal.signal(signal.SIGTERM, handle_signal)
|
||||||
|
|
||||||
|
output_path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
with output_path.open("a", encoding="utf-8") as output_file:
|
||||||
|
while not STOP_REQUESTED:
|
||||||
|
started = time.monotonic()
|
||||||
|
try:
|
||||||
|
line = fetch_json(args.url, args.request_timeout_sec)
|
||||||
|
except (TimeoutError, urllib.error.URLError, urllib.error.HTTPError, json.JSONDecodeError) as error:
|
||||||
|
now = time.monotonic()
|
||||||
|
if now - last_error_log_monotonic >= 10.0:
|
||||||
|
print(f"[network-summary] poll failed: {error}", file=sys.stderr)
|
||||||
|
last_error_log_monotonic = now
|
||||||
|
else:
|
||||||
|
output_file.write(line)
|
||||||
|
output_file.write("\n")
|
||||||
|
output_file.flush()
|
||||||
|
|
||||||
|
elapsed = time.monotonic() - started
|
||||||
|
sleep_with_stop(max(0.0, interval_sec - elapsed))
|
||||||
|
|
||||||
|
return 0
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
raise SystemExit(main())
|
||||||
10
scripts/dev/reset-run-context.sh
Normal file
10
scripts/dev/reset-run-context.sh
Normal file
@@ -0,0 +1,10 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||||
|
export BLITZ_SKIP_DEV_RUN_CONTEXT_INIT="1"
|
||||||
|
# shellcheck disable=SC1091
|
||||||
|
source "${SCRIPT_DIR}/load-env.sh"
|
||||||
|
|
||||||
|
blitz_dev_reset_run_context
|
||||||
|
printf 'run_id=%s\nrun_dir=%s\n' "${BLITZ_RUN_ID}" "${BLITZ_RUN_DIR}"
|
||||||
@@ -5,11 +5,13 @@
|
|||||||
# Example:
|
# Example:
|
||||||
# ROBOT_COMMAND_CENTER_ROOT="$HOME/Documents/robot-command-center"
|
# ROBOT_COMMAND_CENTER_ROOT="$HOME/Documents/robot-command-center"
|
||||||
|
|
||||||
CONTROL_SIDE_OMNISOCKET_SERVER_ADDR="81.70.156.140:10909"
|
CONTROL_SIDE_OMNISOCKET_SERVER_ADDR="81.70.156.140:10909" # D
|
||||||
CONTROL_SIDE_OMNISOCKET_RELAY_VIA="81.70.156.140:10909"
|
CONTROL_SIDE_OMNISOCKET_RELAY_VIA="106.55.173.235:10909" # C
|
||||||
|
|
||||||
ROBOT_SIDE_OMNISOCKET_SERVER_ADDR="81.70.156.140:10909"
|
ROBOT_SIDE_OMNISOCKET_SERVER_ADDR="81.70.156.140:10909" # D
|
||||||
ROBOT_SIDE_OMNISOCKET_RELAY_VIA="106.55.173.235:10909"
|
ROBOT_SIDE_OMNISOCKET_RELAY_VIA="81.70.156.140:10909" # 直连 D
|
||||||
|
# Log one normal relay packet out of every N packets. Drop events still log immediately.
|
||||||
|
OMNI_RELAY_PACKET_LOG_SAMPLE_EVERY="200"
|
||||||
|
|
||||||
CONTROL_WS_ALLOWED_ORIGINS="http://127.0.0.1:5173,http://localhost:5173"
|
CONTROL_WS_ALLOWED_ORIGINS="http://127.0.0.1:5173,http://localhost:5173"
|
||||||
VITE_API_BASE_URL="http://127.0.0.1:8001"
|
VITE_API_BASE_URL="http://127.0.0.1:8001"
|
||||||
@@ -19,6 +21,13 @@ PYTHON_VENV_PATH="${OMNISOCKETGO_ROOT}/.venv"
|
|||||||
|
|
||||||
BACKEND_HOST="0.0.0.0"
|
BACKEND_HOST="0.0.0.0"
|
||||||
BACKEND_PORT="8001"
|
BACKEND_PORT="8001"
|
||||||
|
OMNISOCKET_TELEMETRY_PEER_ID="peer-a-telemetry"
|
||||||
|
OMNISOCKET_TELEMETRY_INTERVAL_MS="1000"
|
||||||
|
OMNISOCKET_TELEMETRY_STALE_AFTER_MS="3000"
|
||||||
|
OMNI_NETWORK_SUMMARY_LOG_ENABLED="1"
|
||||||
|
OMNI_NETWORK_SUMMARY_LOG_PATH="${OMNISOCKETGO_ROOT}/logs/a-network-summary.jsonl"
|
||||||
|
OMNI_NETWORK_SUMMARY_LOG_INTERVAL_MS="1000"
|
||||||
|
OMNI_NETWORK_SUMMARY_LOG_REQUEST_TIMEOUT_SEC="3"
|
||||||
|
|
||||||
FRONTEND_HOST="0.0.0.0"
|
FRONTEND_HOST="0.0.0.0"
|
||||||
FRONTEND_PORT="5173"
|
FRONTEND_PORT="5173"
|
||||||
@@ -37,13 +46,31 @@ ROBOT_RECEIVER_PUBLISH_RATE_HZ="100.0"
|
|||||||
|
|
||||||
OMNI_VIDEO_PEER_ID="peer-b-video"
|
OMNI_VIDEO_PEER_ID="peer-b-video"
|
||||||
OMNI_VIDEO_TARGET_PEER="peer-a-video"
|
OMNI_VIDEO_TARGET_PEER="peer-a-video"
|
||||||
OMNI_CAMERA_DEVICE="/dev/video26"
|
OMNI_GPSD_HOST="127.0.0.1"
|
||||||
|
OMNI_CAMERA_DEVICE="/dev/v4l/by-path/platform-a80aa10000.usb-usb-0:3.2:1.4-video-index0"
|
||||||
|
OMNI_CAMERA_PROFILE="day"
|
||||||
|
OMNI_CAMERA_BRIGHTNESS=""
|
||||||
|
OMNI_CAMERA_CUSTOM_CTRL=""
|
||||||
|
OMNI_CAMERA_VERIFY="0"
|
||||||
OMNI_VIDEO_SERVER_ADDR="${ROBOT_SIDE_OMNISOCKET_SERVER_ADDR}"
|
OMNI_VIDEO_SERVER_ADDR="${ROBOT_SIDE_OMNISOCKET_SERVER_ADDR}"
|
||||||
OMNI_VIDEO_RELAY_VIA="${ROBOT_SIDE_OMNISOCKET_RELAY_VIA}"
|
OMNI_VIDEO_RELAY_VIA="${ROBOT_SIDE_OMNISOCKET_RELAY_VIA}"
|
||||||
|
OMNI_VIDEO_SOFT_BACKPRESSURE_SEGMENTS="256"
|
||||||
|
OMNI_VIDEO_HARD_BACKPRESSURE_SEGMENTS="1024"
|
||||||
|
OMNI_VIDEO_HARD_BACKPRESSURE_HOLD_MS="5000"
|
||||||
|
OMNI_VIDEO_FRAME_STALL_RECONNECT_MS="30000"
|
||||||
OMNI_CONTROL_PEER_ID="peer-b-ctrl"
|
OMNI_CONTROL_PEER_ID="peer-b-ctrl"
|
||||||
OMNI_CONTROL_EXPECTED_SENDER="peer-a-ctrl"
|
OMNI_CONTROL_EXPECTED_SENDER="peer-a-ctrl"
|
||||||
OMNI_CONTROL_SERVER_ADDR="${ROBOT_SIDE_OMNISOCKET_SERVER_ADDR}"
|
OMNI_CONTROL_SERVER_ADDR="${ROBOT_SIDE_OMNISOCKET_SERVER_ADDR}"
|
||||||
OMNI_CONTROL_RELAY_VIA="${ROBOT_SIDE_OMNISOCKET_RELAY_VIA}"
|
OMNI_CONTROL_RELAY_VIA="${ROBOT_SIDE_OMNISOCKET_RELAY_VIA}"
|
||||||
OMNI_CONTROL_UNIX_SOCKET_PATH="${ROBOT_RECEIVER_LOCAL_SOCKET_PATH}"
|
OMNI_CONTROL_UNIX_SOCKET_PATH="${ROBOT_RECEIVER_LOCAL_SOCKET_PATH}"
|
||||||
|
OMNI_CONTROL_ACK_PEER_ID="peer-b-ctrl-ack"
|
||||||
|
OMNI_CONTROL_ACK_TARGET_PEER="peer-a-ctrl-ack"
|
||||||
|
BLITZ_CONTROL_ACK_SAMPLE_MOD="10"
|
||||||
|
BLITZ_VIDEO_STAGE_LOG_ENABLED="1"
|
||||||
|
BLITZ_VIDEO_STAGE_LOG_SAMPLE_MOD="10"
|
||||||
|
OMNI_CONTROL_SERVER_IDLE_RECONNECT_MS="30000"
|
||||||
|
|
||||||
|
# A-side backend video freshness guard. Used by scripts/dev/start-backend.sh.
|
||||||
|
OMNI_VIDEO_MAX_FRAME_AGE_MS="1000"
|
||||||
|
|
||||||
B_SIDE_OMNID_USE_SUDO="1"
|
B_SIDE_OMNID_USE_SUDO="1"
|
||||||
|
|||||||
9
scripts/dev/start-5g-link-logger.sh
Normal file
9
scripts/dev/start-5g-link-logger.sh
Normal file
@@ -0,0 +1,9 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||||
|
# shellcheck disable=SC1091
|
||||||
|
source "${SCRIPT_DIR}/load-env.sh"
|
||||||
|
|
||||||
|
blitz_dev_prepare_5g_logging_env
|
||||||
|
exec bash "${OMNISOCKETGO_ROOT}/scripts/boot/blitz-5g-link-logger.sh"
|
||||||
@@ -4,6 +4,7 @@ set -euo pipefail
|
|||||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||||
# shellcheck disable=SC1091
|
# shellcheck disable=SC1091
|
||||||
source "${SCRIPT_DIR}/load-env.sh"
|
source "${SCRIPT_DIR}/load-env.sh"
|
||||||
|
blitz_dev_prepare_bside_logging_env
|
||||||
|
|
||||||
cd "${OMNISOCKETGO_ROOT}"
|
cd "${OMNISOCKETGO_ROOT}"
|
||||||
|
|
||||||
@@ -14,12 +15,41 @@ export OMNI_VIDEO_RELAY_VIA="${OMNI_VIDEO_RELAY_VIA}"
|
|||||||
export OMNI_CONTROL_SERVER_ADDR="${OMNI_CONTROL_SERVER_ADDR}"
|
export OMNI_CONTROL_SERVER_ADDR="${OMNI_CONTROL_SERVER_ADDR}"
|
||||||
export OMNI_CONTROL_RELAY_VIA="${OMNI_CONTROL_RELAY_VIA}"
|
export OMNI_CONTROL_RELAY_VIA="${OMNI_CONTROL_RELAY_VIA}"
|
||||||
|
|
||||||
|
logger_pid=""
|
||||||
|
|
||||||
|
cleanup() {
|
||||||
|
if [[ -n "${logger_pid}" ]]; then
|
||||||
|
kill "${logger_pid}" 2>/dev/null || true
|
||||||
|
wait "${logger_pid}" 2>/dev/null || true
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
start_5g_link_logger_if_needed() {
|
||||||
|
if [[ "${OMNI_BOOT_MODE:-0}" == "1" ]]; then
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
bash "${SCRIPT_DIR}/start-5g-link-logger.sh" &
|
||||||
|
logger_pid=$!
|
||||||
|
echo "[start-b-side-omnid] 5G link logger -> ${BLITZ_5G_LINK_LOG_PATH:-unset}" >&2
|
||||||
|
}
|
||||||
|
|
||||||
if [[ ! -x "./bin/b_side_omnid" ]]; then
|
if [[ ! -x "./bin/b_side_omnid" ]]; then
|
||||||
|
if [[ "${OMNI_BOOT_MODE:-0}" == "1" ]]; then
|
||||||
|
echo "Missing ./bin/b_side_omnid in boot mode; build it before enabling the autostart service." >&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
make b_side_omnid
|
make b_side_omnid
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
launch_b_side_omnid() {
|
||||||
|
trap cleanup EXIT INT TERM
|
||||||
|
start_5g_link_logger_if_needed
|
||||||
|
bash "${SCRIPT_DIR}/apply-camera-controls.sh"
|
||||||
|
./bin/b_side_omnid
|
||||||
|
}
|
||||||
|
|
||||||
if [[ "${B_SIDE_OMNID_USE_SUDO}" == "1" && "${EUID}" -ne 0 ]]; then
|
if [[ "${B_SIDE_OMNID_USE_SUDO}" == "1" && "${EUID}" -ne 0 ]]; then
|
||||||
exec sudo -E ./bin/b_side_omnid
|
exec sudo -E bash -lc 'cd "$1" && export B_SIDE_OMNID_USE_SUDO=0 && exec bash "$2"' _ "${OMNISOCKETGO_ROOT}" "${SCRIPT_DIR}/start-b-side-omnid.sh"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
exec ./bin/b_side_omnid
|
launch_b_side_omnid
|
||||||
|
|||||||
@@ -4,6 +4,8 @@ set -euo pipefail
|
|||||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||||
# shellcheck disable=SC1091
|
# shellcheck disable=SC1091
|
||||||
source "${SCRIPT_DIR}/load-env.sh"
|
source "${SCRIPT_DIR}/load-env.sh"
|
||||||
|
require_robot_command_center_root
|
||||||
|
blitz_dev_prepare_backend_logging_env
|
||||||
|
|
||||||
if [[ ! -d "${PYTHON_VENV_PATH}" ]]; then
|
if [[ ! -d "${PYTHON_VENV_PATH}" ]]; then
|
||||||
"${PYTHON3_BIN}" -m venv "${PYTHON_VENV_PATH}"
|
"${PYTHON3_BIN}" -m venv "${PYTHON_VENV_PATH}"
|
||||||
@@ -15,4 +17,38 @@ source "${PYTHON_VENV_PATH}/bin/activate"
|
|||||||
cd "${BACKEND_DIR}"
|
cd "${BACKEND_DIR}"
|
||||||
export OMNISOCKET_SERVER_ADDR="${CONTROL_SIDE_OMNISOCKET_SERVER_ADDR}"
|
export OMNISOCKET_SERVER_ADDR="${CONTROL_SIDE_OMNISOCKET_SERVER_ADDR}"
|
||||||
export OMNISOCKET_RELAY_VIA="${CONTROL_SIDE_OMNISOCKET_RELAY_VIA}"
|
export OMNISOCKET_RELAY_VIA="${CONTROL_SIDE_OMNISOCKET_RELAY_VIA}"
|
||||||
exec python -m uvicorn config.asgi:application --host "${BACKEND_HOST}" --port "${BACKEND_PORT}"
|
|
||||||
|
logger_pid=""
|
||||||
|
|
||||||
|
cleanup() {
|
||||||
|
if [[ -n "${logger_pid}" ]]; then
|
||||||
|
kill "${logger_pid}" 2>/dev/null || true
|
||||||
|
wait "${logger_pid}" 2>/dev/null || true
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
start_network_summary_logger() {
|
||||||
|
local logger_url
|
||||||
|
local logger_dir
|
||||||
|
|
||||||
|
if [[ "${OMNI_NETWORK_SUMMARY_LOG_ENABLED}" != "1" ]]; then
|
||||||
|
return
|
||||||
|
fi
|
||||||
|
|
||||||
|
logger_url="http://127.0.0.1:${BACKEND_PORT}/api/network/latest/"
|
||||||
|
logger_dir="$(dirname "${OMNI_NETWORK_SUMMARY_LOG_PATH}")"
|
||||||
|
mkdir -p "${logger_dir}"
|
||||||
|
|
||||||
|
python "${SCRIPT_DIR}/log-network-summary.py" \
|
||||||
|
--url "${logger_url}" \
|
||||||
|
--output "${OMNI_NETWORK_SUMMARY_LOG_PATH}" \
|
||||||
|
--interval-ms "${OMNI_NETWORK_SUMMARY_LOG_INTERVAL_MS}" \
|
||||||
|
--request-timeout-sec "${OMNI_NETWORK_SUMMARY_LOG_REQUEST_TIMEOUT_SEC}" &
|
||||||
|
logger_pid=$!
|
||||||
|
echo "[start-backend] network summary logger -> ${OMNI_NETWORK_SUMMARY_LOG_PATH} (${OMNI_NETWORK_SUMMARY_LOG_INTERVAL_MS} ms)" >&2
|
||||||
|
}
|
||||||
|
|
||||||
|
trap cleanup EXIT INT TERM
|
||||||
|
|
||||||
|
start_network_summary_logger
|
||||||
|
python -m uvicorn config.asgi:application --host "${BACKEND_HOST}" --port "${BACKEND_PORT}"
|
||||||
|
|||||||
@@ -4,6 +4,7 @@ set -euo pipefail
|
|||||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||||
# shellcheck disable=SC1091
|
# shellcheck disable=SC1091
|
||||||
source "${SCRIPT_DIR}/load-env.sh"
|
source "${SCRIPT_DIR}/load-env.sh"
|
||||||
|
require_robot_command_center_root
|
||||||
|
|
||||||
cd "${FRONTEND_DIR}"
|
cd "${FRONTEND_DIR}"
|
||||||
exec npm run dev -- --host "${FRONTEND_HOST}" --port "${FRONTEND_PORT}"
|
exec npm run dev -- --host "${FRONTEND_HOST}" --port "${FRONTEND_PORT}"
|
||||||
|
|||||||
@@ -2,23 +2,49 @@
|
|||||||
set -euo pipefail
|
set -euo pipefail
|
||||||
|
|
||||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||||
|
|
||||||
|
source_with_nounset_off() {
|
||||||
|
set +u
|
||||||
|
# shellcheck disable=SC1090
|
||||||
|
source "$1"
|
||||||
|
set -u
|
||||||
|
}
|
||||||
|
|
||||||
# shellcheck disable=SC1091
|
# shellcheck disable=SC1091
|
||||||
source "${SCRIPT_DIR}/load-env.sh"
|
source "${SCRIPT_DIR}/load-env.sh"
|
||||||
# shellcheck disable=SC1091
|
if [[ ! -f "/opt/ros/${ROS_DISTRO}/setup.bash" ]]; then
|
||||||
source "/opt/ros/${ROS_DISTRO}/setup.bash"
|
echo "Missing ROS distro setup: /opt/ros/${ROS_DISTRO}/setup.bash" >&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
source_with_nounset_off "/opt/ros/${ROS_DISTRO}/setup.bash"
|
||||||
|
|
||||||
cd "${ROS_CONTROL_PY_DIR}"
|
cd "${ROS_CONTROL_PY_DIR}"
|
||||||
# shellcheck disable=SC1091
|
if [[ ! -f "install/setup.bash" ]]; then
|
||||||
source "install/setup.bash"
|
echo "Missing ROS workspace setup: ${ROS_CONTROL_PY_DIR}/install/setup.bash" >&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
source_with_nounset_off "install/setup.bash"
|
||||||
|
|
||||||
exec ros2 launch udp_teleop_bridge robot_udp_receiver.launch.py \
|
launch_args=(
|
||||||
"transport:=${ROBOT_RECEIVER_TRANSPORT}" \
|
"transport:=${ROBOT_RECEIVER_TRANSPORT}"
|
||||||
"server_addr:=${ROBOT_RECEIVER_SERVER_ADDR}" \
|
"peer_id:=${ROBOT_RECEIVER_PEER_ID}"
|
||||||
"relay_via:=${ROBOT_RECEIVER_RELAY_VIA}" \
|
"local_socket_path:=${ROBOT_RECEIVER_LOCAL_SOCKET_PATH}"
|
||||||
"peer_id:=${ROBOT_RECEIVER_PEER_ID}" \
|
"output_topic:=${ROBOT_RECEIVER_OUTPUT_TOPIC}"
|
||||||
"expected_sender:=${ROBOT_RECEIVER_EXPECTED_SENDER}" \
|
"frame_id:=${ROBOT_RECEIVER_FRAME_ID}"
|
||||||
"local_socket_path:=${ROBOT_RECEIVER_LOCAL_SOCKET_PATH}" \
|
"watchdog_timeout:=${ROBOT_RECEIVER_WATCHDOG_TIMEOUT}"
|
||||||
"output_topic:=${ROBOT_RECEIVER_OUTPUT_TOPIC}" \
|
|
||||||
"frame_id:=${ROBOT_RECEIVER_FRAME_ID}" \
|
|
||||||
"watchdog_timeout:=${ROBOT_RECEIVER_WATCHDOG_TIMEOUT}" \
|
|
||||||
"publish_rate_hz:=${ROBOT_RECEIVER_PUBLISH_RATE_HZ}"
|
"publish_rate_hz:=${ROBOT_RECEIVER_PUBLISH_RATE_HZ}"
|
||||||
|
)
|
||||||
|
|
||||||
|
if [[ -n "${ROBOT_RECEIVER_SERVER_ADDR}" ]]; then
|
||||||
|
launch_args+=("server_addr:=${ROBOT_RECEIVER_SERVER_ADDR}")
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [[ -n "${ROBOT_RECEIVER_RELAY_VIA}" ]]; then
|
||||||
|
launch_args+=("relay_via:=${ROBOT_RECEIVER_RELAY_VIA}")
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [[ -n "${ROBOT_RECEIVER_EXPECTED_SENDER}" ]]; then
|
||||||
|
launch_args+=("expected_sender:=${ROBOT_RECEIVER_EXPECTED_SENDER}")
|
||||||
|
fi
|
||||||
|
|
||||||
|
exec ros2 launch udp_teleop_bridge robot_udp_receiver.launch.py "${launch_args[@]}"
|
||||||
|
|||||||
333
src/gps_buffer.c
Normal file
333
src/gps_buffer.c
Normal file
@@ -0,0 +1,333 @@
|
|||||||
|
#include "gps_buffer.h"
|
||||||
|
#include <pthread.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include <unistd.h>
|
||||||
|
#include <sys/socket.h>
|
||||||
|
#include <netdb.h>
|
||||||
|
#include <math.h>
|
||||||
|
#include <errno.h> // 确保包含 errno
|
||||||
|
|
||||||
|
// 全局共享变量
|
||||||
|
static gps_video_sample_t g_current_gps_data = {0.0, 0.0};
|
||||||
|
static volatile int g_running = 0;
|
||||||
|
static pthread_t g_gps_thread;
|
||||||
|
static pthread_mutex_t g_gps_mutex = PTHREAD_MUTEX_INITIALIZER;
|
||||||
|
|
||||||
|
static double normalize_coordinate(double coordinate) {
|
||||||
|
return round(coordinate * 1000000.0) / 1000000.0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void store_gps(double latitude, double longitude) {
|
||||||
|
pthread_mutex_lock(&g_gps_mutex);
|
||||||
|
g_current_gps_data.latitude = normalize_coordinate(latitude);
|
||||||
|
g_current_gps_data.longitude = normalize_coordinate(longitude);
|
||||||
|
pthread_mutex_unlock(&g_gps_mutex);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void clear_gps(void) {
|
||||||
|
pthread_mutex_lock(&g_gps_mutex);
|
||||||
|
g_current_gps_data.latitude = 0.0;
|
||||||
|
g_current_gps_data.longitude = 0.0;
|
||||||
|
pthread_mutex_unlock(&g_gps_mutex);
|
||||||
|
}
|
||||||
|
|
||||||
|
static gps_video_sample_t load_gps(void) {
|
||||||
|
gps_video_sample_t sample;
|
||||||
|
|
||||||
|
pthread_mutex_lock(&g_gps_mutex);
|
||||||
|
sample = g_current_gps_data;
|
||||||
|
pthread_mutex_unlock(&g_gps_mutex);
|
||||||
|
return sample;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void gps_sleep_before_retry(void) {
|
||||||
|
int retry_ms = 1000;
|
||||||
|
int step_ms = 100;
|
||||||
|
int elapsed_ms = 0;
|
||||||
|
|
||||||
|
while (g_running && elapsed_ms < retry_ms) {
|
||||||
|
usleep((useconds_t) step_ms * 1000U);
|
||||||
|
elapsed_ms += step_ms;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// 将经纬度规范化为 double,保留 6 位小数。
|
||||||
|
static int normalize_gps(double latitude, double longitude, gps_video_sample_t* sample) {
|
||||||
|
if (!isfinite(latitude) || !isfinite(longitude)) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
// 过滤掉 0,0 这种无效坐标
|
||||||
|
if (fabs(latitude) < 1e-6 && fabs(longitude) < 1e-6) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (sample == NULL) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
sample->latitude = normalize_coordinate(latitude);
|
||||||
|
sample->longitude = normalize_coordinate(longitude);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
// =================================================================
|
||||||
|
// 以下是借鉴 gps_parse.c 实现的底层解析函数
|
||||||
|
// =================================================================
|
||||||
|
|
||||||
|
// 1. 辅助函数:在 JSON 字符串中查找键对应的值的起始位置
|
||||||
|
static const char* find_json_value(const char* json, const char* key) {
|
||||||
|
char pattern[64];
|
||||||
|
int written;
|
||||||
|
const char* position;
|
||||||
|
|
||||||
|
if (json == NULL || key == NULL) return NULL;
|
||||||
|
|
||||||
|
// 构建搜索模式: "key":
|
||||||
|
written = snprintf(pattern, sizeof(pattern), "\"%s\":", key);
|
||||||
|
if (written < 0 || (size_t)written >= sizeof(pattern)) {
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
position = strstr(json, pattern);
|
||||||
|
if (position == NULL) {
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
// 跳过 "key":
|
||||||
|
position += written;
|
||||||
|
|
||||||
|
// 跳过可能存在的空格
|
||||||
|
while (*position == ' ' || *position == '\t') {
|
||||||
|
position++;
|
||||||
|
}
|
||||||
|
|
||||||
|
return position;
|
||||||
|
}
|
||||||
|
|
||||||
|
// 2. 解析函数:从 JSON 字符串中提取 Double 类型的值
|
||||||
|
static int json_extract_double(const char* json, const char* key, double* value) {
|
||||||
|
const char* position;
|
||||||
|
char* endptr = NULL;
|
||||||
|
double parsed;
|
||||||
|
|
||||||
|
position = find_json_value(json, key);
|
||||||
|
if (position == NULL) {
|
||||||
|
return 0; // 键不存在
|
||||||
|
}
|
||||||
|
|
||||||
|
// 确保当前位置是数字或负号
|
||||||
|
if (*position != '-' && !(*position >= '0' && *position <= '9')) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
// 重置 errno 以检测错误
|
||||||
|
errno = 0;
|
||||||
|
parsed = strtod(position, &endptr);
|
||||||
|
|
||||||
|
// 检查转换是否成功
|
||||||
|
if (errno != 0 || endptr == position || !isfinite(parsed)) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
*value = parsed;
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
// 3. 解析函数:从 JSON 字符串中提取 Int 类型的值
|
||||||
|
static int json_extract_int(const char* json, const char* key, int* value) {
|
||||||
|
double dval;
|
||||||
|
if (json_extract_double(json, key, &dval)) {
|
||||||
|
*value = (int)dval;
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
// 4. 检查是否为 TPV (定位数据) 包
|
||||||
|
static int is_tpv_class(const char* json) {
|
||||||
|
char class_buf[32] = {0};
|
||||||
|
const char* pos = find_json_value(json, "class");
|
||||||
|
if (pos == NULL || *pos != '"') return 0;
|
||||||
|
|
||||||
|
// 简单提取 class 的值 (TPV/SKY/DEVICES)
|
||||||
|
sscanf(pos, "\"%31[^\"]\"", class_buf);
|
||||||
|
return (strcmp(class_buf, "TPV") == 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
// =================================================================
|
||||||
|
// 后台线程函数:负责连接 gpsd 并更新全局变量
|
||||||
|
// =================================================================
|
||||||
|
void* gps_update_thread(void* arg) {
|
||||||
|
const char* host = (const char*)arg;
|
||||||
|
const char* gpsd_host = (host != NULL && host[0] != '\0') ? host : "127.0.0.1";
|
||||||
|
|
||||||
|
while (g_running) {
|
||||||
|
int sockfd = -1;
|
||||||
|
struct addrinfo hints;
|
||||||
|
struct addrinfo *res = NULL;
|
||||||
|
struct addrinfo *rp = NULL;
|
||||||
|
int s;
|
||||||
|
char buffer[4096];
|
||||||
|
size_t offset = 0;
|
||||||
|
|
||||||
|
// 1. 解析地址并连接 gpsd (默认端口 2947)
|
||||||
|
memset(&hints, 0, sizeof(hints));
|
||||||
|
hints.ai_family = AF_UNSPEC; // 兼容 IPv4/IPv6
|
||||||
|
hints.ai_socktype = SOCK_STREAM;
|
||||||
|
|
||||||
|
s = getaddrinfo(gpsd_host, "2947", &hints, &res);
|
||||||
|
if (s != 0) {
|
||||||
|
fprintf(stderr, "GPS线程: 解析 gpsd 地址失败 %s:2947: %s\n", gpsd_host, gai_strerror(s));
|
||||||
|
gps_sleep_before_retry();
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// 尝试连接每一个解析出来的地址
|
||||||
|
for (rp = res; rp != NULL; rp = rp->ai_next) {
|
||||||
|
sockfd = socket(rp->ai_family, rp->ai_socktype, rp->ai_protocol);
|
||||||
|
if (sockfd == -1) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (connect(sockfd, rp->ai_addr, rp->ai_addrlen) != -1) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
close(sockfd);
|
||||||
|
sockfd = -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
freeaddrinfo(res);
|
||||||
|
|
||||||
|
if (sockfd < 0) {
|
||||||
|
fprintf(stderr, "GPS线程: 无法连接到 %s:2947,1 秒后重试\n", gpsd_host);
|
||||||
|
gps_sleep_before_retry();
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
printf("GPS线程: 已连接到 gpsd %s\n", gpsd_host);
|
||||||
|
|
||||||
|
// 2. 发送 WATCH 命令,开启 JSON 流
|
||||||
|
{
|
||||||
|
const char* watch_cmd = "?WATCH={\"enable\":true,\"json\":true};\n";
|
||||||
|
|
||||||
|
if (send(sockfd, watch_cmd, strlen(watch_cmd), 0) < 0) {
|
||||||
|
perror("GPS线程: 发送 WATCH 命令失败");
|
||||||
|
close(sockfd);
|
||||||
|
gps_sleep_before_retry();
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// 3. 主循环:读取并解析数据流
|
||||||
|
// 注意:gpsd 数据是以 \n 结尾的,不能直接用固定长度 recv
|
||||||
|
while (g_running) {
|
||||||
|
ssize_t len = recv(sockfd, buffer + offset, sizeof(buffer) - 1 - offset, 0);
|
||||||
|
|
||||||
|
if (len <= 0) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
offset += (size_t) len;
|
||||||
|
buffer[offset] = '\0'; // 确保字符串结束
|
||||||
|
|
||||||
|
// 查找换行符 \n,因为一条完整的 JSON 消息以 \n 结尾
|
||||||
|
char* start = buffer;
|
||||||
|
char* end;
|
||||||
|
|
||||||
|
while ((end = memchr(start, '\n', (buffer + offset) - start)) != NULL) {
|
||||||
|
*end = '\0'; // 临时截断,形成独立字符串
|
||||||
|
|
||||||
|
// --- 核心解析逻辑 ---
|
||||||
|
// 1. 检查是否为 TPV 数据包
|
||||||
|
if (is_tpv_class(start)) {
|
||||||
|
double lat = 0.0;
|
||||||
|
double lon = 0.0;
|
||||||
|
int mode = 0;
|
||||||
|
int has_fix = 0;
|
||||||
|
|
||||||
|
// 2. 提取定位模式 (mode: 1=无定位, 2=2D, 3=3D)
|
||||||
|
if (json_extract_int(start, "mode", &mode)) {
|
||||||
|
has_fix = (mode >= 2);
|
||||||
|
}
|
||||||
|
|
||||||
|
// 3. 如果有定位,提取经纬度
|
||||||
|
if (has_fix) {
|
||||||
|
int got_lat = json_extract_double(start, "lat", &lat);
|
||||||
|
int got_lon = json_extract_double(start, "lon", &lon);
|
||||||
|
|
||||||
|
if (got_lat && got_lon) {
|
||||||
|
gps_video_sample_t sample;
|
||||||
|
|
||||||
|
// 4. 更新全局共享变量,使用 double 直接携带经纬度。
|
||||||
|
if (normalize_gps(lat, lon, &sample) == 0) {
|
||||||
|
store_gps(sample.latitude, sample.longitude);
|
||||||
|
}
|
||||||
|
// 调试:取消注释可查看实时经纬度
|
||||||
|
// printf("更新GPS: lat=%.6f, lon=%.6f\n", lat, lon);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// 如果无定位,这里不操作,保持上一次的有效值
|
||||||
|
}
|
||||||
|
// --- 解析结束 ---
|
||||||
|
|
||||||
|
// 移动指针到下一条消息
|
||||||
|
start = end + 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
// 处理完所有完整消息后,将剩余未处理的数据移到缓冲区头部
|
||||||
|
if (start < buffer + offset) {
|
||||||
|
size_t remaining = (size_t) ((buffer + offset) - start);
|
||||||
|
memmove(buffer, start, remaining);
|
||||||
|
offset = remaining;
|
||||||
|
} else {
|
||||||
|
offset = 0; // 缓冲区已清空
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
close(sockfd);
|
||||||
|
if (g_running) {
|
||||||
|
fprintf(stderr, "GPS线程: 连接断开,1 秒后重连...\n");
|
||||||
|
gps_sleep_before_retry();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
// =================================================================
|
||||||
|
// 接口函数实现
|
||||||
|
// =================================================================
|
||||||
|
gps_video_sample_t get_latest_gps_for_video(void) {
|
||||||
|
return load_gps();
|
||||||
|
}
|
||||||
|
|
||||||
|
int gps_buffer_init(const char* host) {
|
||||||
|
if (g_running) return 0;
|
||||||
|
|
||||||
|
g_running = 1;
|
||||||
|
clear_gps();
|
||||||
|
pthread_attr_t attr;
|
||||||
|
pthread_attr_init(&attr);
|
||||||
|
pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED);
|
||||||
|
// 创建后台线程
|
||||||
|
if (pthread_create(&g_gps_thread, &attr, gps_update_thread, (void*)host) != 0) {
|
||||||
|
g_running = 0;
|
||||||
|
pthread_attr_destroy(&attr); // 清理属性
|
||||||
|
perror("无法创建 GPS 线程");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
pthread_attr_destroy(&attr); // 清理属性
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
void gps_buffer_cleanup(void) {
|
||||||
|
g_running = 0;
|
||||||
|
// 等待线程结束
|
||||||
|
|
||||||
|
usleep(10000); // 等待 100ms 让后台线程有机会处理退出标志
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
//gcc main.c video_pipeline_run.c gps_buffer.c -lpthread -lm -o my_app 请确保在编译命令中链接 pthread 和 m (math) 库
|
||||||
@@ -18,7 +18,7 @@ kcp_packet_debug_logger_t *kcp_packet_debug_open_jsonl(const char *path) {
|
|||||||
fclose(file);
|
fclose(file);
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
omni_file_logger_init(&logger->file_logger, file);
|
omni_file_logger_init_path(&logger->file_logger, file, path, 0);
|
||||||
logger->enabled = 1;
|
logger->enabled = 1;
|
||||||
return logger;
|
return logger;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -73,7 +73,7 @@ kcp_session_stats_logger_t *kcp_session_stats_open_jsonl(const char *path) {
|
|||||||
fclose(file);
|
fclose(file);
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
omni_file_logger_init(&logger->file_logger, file);
|
omni_file_logger_init_path(&logger->file_logger, file, path, 0);
|
||||||
logger->enabled = 1;
|
logger->enabled = 1;
|
||||||
return logger;
|
return logger;
|
||||||
}
|
}
|
||||||
@@ -156,10 +156,18 @@ int kcp_session_stats_log(kcp_session_stats_logger_t *logger, const kcp_session_
|
|||||||
kcp_session_stats_appendf(&line, &line_len, ",\"srtt_ms\":%d", record->srtt_ms) != 0) {
|
kcp_session_stats_appendf(&line, &line_len, ",\"srtt_ms\":%d", record->srtt_ms) != 0) {
|
||||||
goto cleanup;
|
goto cleanup;
|
||||||
}
|
}
|
||||||
|
if (record->has_min_srtt_ms &&
|
||||||
|
kcp_session_stats_appendf(&line, &line_len, ",\"min_srtt_ms\":%d", record->min_srtt_ms) != 0) {
|
||||||
|
goto cleanup;
|
||||||
|
}
|
||||||
if (record->has_srttvar_ms &&
|
if (record->has_srttvar_ms &&
|
||||||
kcp_session_stats_appendf(&line, &line_len, ",\"srttvar_ms\":%d", record->srttvar_ms) != 0) {
|
kcp_session_stats_appendf(&line, &line_len, ",\"srttvar_ms\":%d", record->srttvar_ms) != 0) {
|
||||||
goto cleanup;
|
goto cleanup;
|
||||||
}
|
}
|
||||||
|
if (record->has_last_feedback_age_ms &&
|
||||||
|
kcp_session_stats_appendf(&line, &line_len, ",\"last_feedback_age_ms\":%u", record->last_feedback_age_ms) != 0) {
|
||||||
|
goto cleanup;
|
||||||
|
}
|
||||||
if (record->has_snd_wnd &&
|
if (record->has_snd_wnd &&
|
||||||
kcp_session_stats_appendf(&line, &line_len, ",\"snd_wnd\":%u", record->snd_wnd) != 0) {
|
kcp_session_stats_appendf(&line, &line_len, ",\"snd_wnd\":%u", record->snd_wnd) != 0) {
|
||||||
goto cleanup;
|
goto cleanup;
|
||||||
|
|||||||
@@ -32,7 +32,7 @@ latency_logger_t *latencylog_open_jsonl(const char *path) {
|
|||||||
fclose(file);
|
fclose(file);
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
omni_file_logger_init(&logger->file_logger, file);
|
omni_file_logger_init_path(&logger->file_logger, file, path, 0);
|
||||||
logger->enabled = 1;
|
logger->enabled = 1;
|
||||||
return logger;
|
return logger;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -544,9 +544,217 @@ const char *omni_path_base_name(const char *path) {
|
|||||||
return slash == NULL ? path : slash + 1;
|
return slash == NULL ? path : slash + 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static uint64_t omni_now_monotonic_ms64(void) {
|
||||||
|
struct timespec ts;
|
||||||
|
clock_gettime(CLOCK_MONOTONIC, &ts);
|
||||||
|
return (uint64_t) ts.tv_sec * 1000ULL + (uint64_t) (ts.tv_nsec / 1000000L);
|
||||||
|
}
|
||||||
|
|
||||||
|
static int omni_positive_int_env(const char *name, int default_value) {
|
||||||
|
const char *raw = getenv(name);
|
||||||
|
long parsed;
|
||||||
|
char *endptr = NULL;
|
||||||
|
|
||||||
|
if (raw == NULL || raw[0] == '\0') {
|
||||||
|
return default_value;
|
||||||
|
}
|
||||||
|
parsed = strtol(raw, &endptr, 10);
|
||||||
|
if (endptr == raw || *endptr != '\0' || parsed <= 0) {
|
||||||
|
return default_value;
|
||||||
|
}
|
||||||
|
return (int) parsed;
|
||||||
|
}
|
||||||
|
|
||||||
|
static size_t omni_positive_size_env(const char *name, size_t default_value) {
|
||||||
|
const char *raw = getenv(name);
|
||||||
|
unsigned long long parsed;
|
||||||
|
char *endptr = NULL;
|
||||||
|
|
||||||
|
if (raw == NULL || raw[0] == '\0') {
|
||||||
|
return default_value;
|
||||||
|
}
|
||||||
|
parsed = strtoull(raw, &endptr, 10);
|
||||||
|
if (endptr == raw || *endptr != '\0' || parsed == 0ULL) {
|
||||||
|
return default_value;
|
||||||
|
}
|
||||||
|
return (size_t) parsed;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int omni_file_logger_flush_locked(omni_file_logger_t *logger, uint64_t now_ms) {
|
||||||
|
if (logger == NULL || logger->file == NULL) {
|
||||||
|
errno = EINVAL;
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
if (fflush(logger->file) != 0) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
logger->buffered_bytes = 0U;
|
||||||
|
logger->last_flush_monotonic_ms = now_ms;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int omni_build_rotated_path(char *buffer, size_t buffer_len, const char *path, int suffix) {
|
||||||
|
size_t path_len;
|
||||||
|
int written;
|
||||||
|
|
||||||
|
if (buffer == NULL || buffer_len == 0U || path == NULL || path[0] == '\0') {
|
||||||
|
errno = EINVAL;
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
path_len = strlen(path);
|
||||||
|
if (path_len + 16U >= buffer_len) {
|
||||||
|
errno = ENAMETOOLONG;
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
memcpy(buffer, path, path_len);
|
||||||
|
written = snprintf(buffer + path_len, buffer_len - path_len, ".%d", suffix);
|
||||||
|
if (written < 0 || (size_t) written >= buffer_len - path_len) {
|
||||||
|
errno = ENAMETOOLONG;
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int omni_file_logger_reopen_append_locked(omni_file_logger_t *logger) {
|
||||||
|
struct stat st;
|
||||||
|
FILE *file;
|
||||||
|
|
||||||
|
if (logger == NULL || logger->path[0] == '\0') {
|
||||||
|
errno = EINVAL;
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
file = fopen(logger->path, "ab");
|
||||||
|
if (file == NULL) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
logger->file = file;
|
||||||
|
logger->current_bytes = 0U;
|
||||||
|
if (stat(logger->path, &st) == 0) {
|
||||||
|
logger->current_bytes = (size_t) st.st_size;
|
||||||
|
}
|
||||||
|
logger->buffered_bytes = 0U;
|
||||||
|
logger->last_flush_monotonic_ms = omni_now_monotonic_ms64();
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int omni_file_logger_recover_after_rotate_locked(omni_file_logger_t *logger, const char *rotated_current_path) {
|
||||||
|
int reopen_errno;
|
||||||
|
|
||||||
|
if (omni_file_logger_reopen_append_locked(logger) == 0) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
reopen_errno = errno;
|
||||||
|
if (rotated_current_path != NULL && rotated_current_path[0] != '\0') {
|
||||||
|
if (rename(rotated_current_path, logger->path) == 0) {
|
||||||
|
if (omni_file_logger_reopen_append_locked(logger) == 0) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
errno = reopen_errno;
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int omni_file_logger_rotate_locked(omni_file_logger_t *logger) {
|
||||||
|
int index;
|
||||||
|
int saved_errno = 0;
|
||||||
|
int should_recover = 0;
|
||||||
|
char rotated_current_path[PATH_MAX];
|
||||||
|
char from_path[PATH_MAX];
|
||||||
|
char to_path[PATH_MAX];
|
||||||
|
|
||||||
|
if (logger == NULL || logger->path[0] == '\0' || logger->max_bytes == 0U || logger->max_files <= 0) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
rotated_current_path[0] = '\0';
|
||||||
|
if (logger->file != NULL) {
|
||||||
|
if (omni_file_logger_flush_locked(logger, omni_now_monotonic_ms64()) != 0) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
should_recover = 1;
|
||||||
|
if (fclose(logger->file) != 0) {
|
||||||
|
logger->file = NULL;
|
||||||
|
saved_errno = errno;
|
||||||
|
goto recover;
|
||||||
|
}
|
||||||
|
logger->file = NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (omni_build_rotated_path(from_path, sizeof(from_path), logger->path, logger->max_files) != 0) {
|
||||||
|
saved_errno = errno;
|
||||||
|
goto recover;
|
||||||
|
}
|
||||||
|
unlink(from_path);
|
||||||
|
for (index = logger->max_files - 1; index >= 1; --index) {
|
||||||
|
if (omni_build_rotated_path(from_path, sizeof(from_path), logger->path, index) != 0 ||
|
||||||
|
omni_build_rotated_path(to_path, sizeof(to_path), logger->path, index + 1) != 0) {
|
||||||
|
saved_errno = errno;
|
||||||
|
goto recover;
|
||||||
|
}
|
||||||
|
if (rename(from_path, to_path) != 0 && errno != ENOENT) {
|
||||||
|
saved_errno = errno;
|
||||||
|
goto recover;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (omni_build_rotated_path(to_path, sizeof(to_path), logger->path, 1) != 0) {
|
||||||
|
saved_errno = errno;
|
||||||
|
goto recover;
|
||||||
|
}
|
||||||
|
if (rename(logger->path, to_path) != 0 && errno != ENOENT) {
|
||||||
|
saved_errno = errno;
|
||||||
|
goto recover;
|
||||||
|
}
|
||||||
|
snprintf(rotated_current_path, sizeof(rotated_current_path), "%s", to_path);
|
||||||
|
|
||||||
|
if (omni_file_logger_reopen_append_locked(logger) != 0) {
|
||||||
|
saved_errno = errno;
|
||||||
|
goto recover;
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
recover:
|
||||||
|
if (should_recover) {
|
||||||
|
int recover_errno = saved_errno != 0 ? saved_errno : errno;
|
||||||
|
if (omni_file_logger_recover_after_rotate_locked(logger, rotated_current_path) == 0) {
|
||||||
|
errno = recover_errno;
|
||||||
|
} else if (saved_errno != 0) {
|
||||||
|
errno = saved_errno;
|
||||||
|
}
|
||||||
|
} else if (saved_errno != 0) {
|
||||||
|
errno = saved_errno;
|
||||||
|
}
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
void omni_file_logger_init(omni_file_logger_t *logger, FILE *file) {
|
void omni_file_logger_init(omni_file_logger_t *logger, FILE *file) {
|
||||||
|
memset(logger, 0, sizeof(*logger));
|
||||||
logger->file = file;
|
logger->file = file;
|
||||||
pthread_mutex_init(&logger->mutex, NULL);
|
pthread_mutex_init(&logger->mutex, NULL);
|
||||||
|
logger->flush_bytes = 1U;
|
||||||
|
logger->flush_interval_ms = 0;
|
||||||
|
logger->immediate_flush = 1;
|
||||||
|
logger->last_flush_monotonic_ms = omni_now_monotonic_ms64();
|
||||||
|
}
|
||||||
|
|
||||||
|
void omni_file_logger_init_path(omni_file_logger_t *logger, FILE *file, const char *path, int immediate_flush) {
|
||||||
|
struct stat st;
|
||||||
|
|
||||||
|
omni_file_logger_init(logger, file);
|
||||||
|
if (path != NULL && path[0] != '\0') {
|
||||||
|
snprintf(logger->path, sizeof(logger->path), "%s", path);
|
||||||
|
if (stat(path, &st) == 0) {
|
||||||
|
logger->current_bytes = (size_t) st.st_size;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
logger->flush_bytes = omni_positive_size_env("BLITZ_JSONL_FLUSH_BYTES", 262144U);
|
||||||
|
logger->flush_interval_ms = omni_positive_int_env("BLITZ_JSONL_FLUSH_INTERVAL_MS", 1000);
|
||||||
|
logger->max_bytes = omni_positive_size_env("BLITZ_JSONL_ROTATE_BYTES", 134217728U);
|
||||||
|
logger->max_files = omni_positive_int_env("BLITZ_JSONL_ROTATE_FILES", 8);
|
||||||
|
logger->immediate_flush = immediate_flush != 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
void omni_file_logger_destroy(omni_file_logger_t *logger) {
|
void omni_file_logger_destroy(omni_file_logger_t *logger) {
|
||||||
@@ -555,13 +763,32 @@ void omni_file_logger_destroy(omni_file_logger_t *logger) {
|
|||||||
|
|
||||||
int omni_file_logger_write_line(omni_file_logger_t *logger, const char *line) {
|
int omni_file_logger_write_line(omni_file_logger_t *logger, const char *line) {
|
||||||
int rc = 0;
|
int rc = 0;
|
||||||
|
size_t line_len;
|
||||||
|
uint64_t now_ms;
|
||||||
if (logger == NULL || logger->file == NULL || line == NULL) {
|
if (logger == NULL || logger->file == NULL || line == NULL) {
|
||||||
errno = EINVAL;
|
errno = EINVAL;
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
line_len = strlen(line) + 1U;
|
||||||
|
now_ms = omni_now_monotonic_ms64();
|
||||||
pthread_mutex_lock(&logger->mutex);
|
pthread_mutex_lock(&logger->mutex);
|
||||||
if (fputs(line, logger->file) == EOF || fputc('\n', logger->file) == EOF || fflush(logger->file) != 0) {
|
if (fputs(line, logger->file) == EOF || fputc('\n', logger->file) == EOF) {
|
||||||
rc = -1;
|
rc = -1;
|
||||||
|
} else {
|
||||||
|
logger->current_bytes += line_len;
|
||||||
|
logger->buffered_bytes += line_len;
|
||||||
|
if (logger->immediate_flush ||
|
||||||
|
logger->buffered_bytes >= logger->flush_bytes ||
|
||||||
|
(logger->flush_interval_ms > 0 && now_ms - logger->last_flush_monotonic_ms >= (uint64_t) logger->flush_interval_ms)) {
|
||||||
|
if (omni_file_logger_flush_locked(logger, now_ms) != 0) {
|
||||||
|
rc = -1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (rc == 0 && logger->max_bytes > 0U && logger->current_bytes >= logger->max_bytes) {
|
||||||
|
if (omni_file_logger_rotate_locked(logger) != 0) {
|
||||||
|
rc = -1;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
pthread_mutex_unlock(&logger->mutex);
|
pthread_mutex_unlock(&logger->mutex);
|
||||||
return rc;
|
return rc;
|
||||||
|
|||||||
@@ -19,6 +19,7 @@ struct kcp_client {
|
|||||||
pthread_mutex_t state_mu;
|
pthread_mutex_t state_mu;
|
||||||
uint64_t next_message_id;
|
uint64_t next_message_id;
|
||||||
int registered;
|
int registered;
|
||||||
|
uint32_t last_server_activity_ms;
|
||||||
char last_server_error[256];
|
char last_server_error[256];
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -42,6 +43,15 @@ static void kcp_client_set_registered(kcp_client_t *client, int registered) {
|
|||||||
pthread_mutex_unlock(&client->state_mu);
|
pthread_mutex_unlock(&client->state_mu);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void kcp_client_touch_server_activity(kcp_client_t *client) {
|
||||||
|
if (client == NULL) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
pthread_mutex_lock(&client->state_mu);
|
||||||
|
client->last_server_activity_ms = omni_now_millis32();
|
||||||
|
pthread_mutex_unlock(&client->state_mu);
|
||||||
|
}
|
||||||
|
|
||||||
static void kcp_client_set_last_server_error(kcp_client_t *client, const char *message) {
|
static void kcp_client_set_last_server_error(kcp_client_t *client, const char *message) {
|
||||||
if (client == NULL) {
|
if (client == NULL) {
|
||||||
return;
|
return;
|
||||||
@@ -55,6 +65,16 @@ static void kcp_client_clear_last_server_error(kcp_client_t *client) {
|
|||||||
kcp_client_set_last_server_error(client, "");
|
kcp_client_set_last_server_error(client, "");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int kcp_client_server_error_invalidates_registration(const char *message) {
|
||||||
|
if (message == NULL || message[0] == '\0') {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
return strstr(message, "not registered") != NULL
|
||||||
|
|| strstr(message, "first message must be register") != NULL
|
||||||
|
|| strstr(message, "peer replaced") != NULL
|
||||||
|
|| strstr(message, "timed out waiting for server_register_ok") != NULL;
|
||||||
|
}
|
||||||
|
|
||||||
static int kcp_client_is_registered(kcp_client_t *client) {
|
static int kcp_client_is_registered(kcp_client_t *client) {
|
||||||
int registered;
|
int registered;
|
||||||
|
|
||||||
@@ -158,6 +178,7 @@ static int kcp_client_handle_reserved_server_message(kcp_client_t *client, const
|
|||||||
if (msg->type != MSG_TYPE_TEXT || strcmp(msg->from, SERVER_PEER_ID) != 0) {
|
if (msg->type != MSG_TYPE_TEXT || strcmp(msg->from, SERVER_PEER_ID) != 0) {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
kcp_client_touch_server_activity(client);
|
||||||
if (kcp_client_text_body_equals(msg, KCP_CLIENT_CTRL_REGISTER_OK)) {
|
if (kcp_client_text_body_equals(msg, KCP_CLIENT_CTRL_REGISTER_OK)) {
|
||||||
kcp_client_set_registered(client, 1);
|
kcp_client_set_registered(client, 1);
|
||||||
kcp_client_clear_last_server_error(client);
|
kcp_client_clear_last_server_error(client);
|
||||||
@@ -239,6 +260,7 @@ static int kcp_client_wait_for_register_ok(kcp_client_t *client) {
|
|||||||
char error_text[256];
|
char error_text[256];
|
||||||
|
|
||||||
kcp_client_copy_server_error_body(&msg, error_text, sizeof(error_text));
|
kcp_client_copy_server_error_body(&msg, error_text, sizeof(error_text));
|
||||||
|
kcp_client_touch_server_activity(client);
|
||||||
kcp_client_set_registered(client, 0);
|
kcp_client_set_registered(client, 0);
|
||||||
kcp_client_set_last_server_error(client, error_text);
|
kcp_client_set_last_server_error(client, error_text);
|
||||||
protocol_message_clear(&msg);
|
protocol_message_clear(&msg);
|
||||||
@@ -290,6 +312,9 @@ static int kcp_client_receive_business_timed(kcp_client_t *client, message_t *ou
|
|||||||
return rc;
|
return rc;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (strcmp(out_msg->from, SERVER_PEER_ID) == 0) {
|
||||||
|
kcp_client_touch_server_activity(client);
|
||||||
|
}
|
||||||
reserved_rc = kcp_client_handle_reserved_server_message(client, out_msg);
|
reserved_rc = kcp_client_handle_reserved_server_message(client, out_msg);
|
||||||
if (reserved_rc < 0) {
|
if (reserved_rc < 0) {
|
||||||
protocol_message_clear(out_msg);
|
protocol_message_clear(out_msg);
|
||||||
@@ -304,6 +329,9 @@ static int kcp_client_receive_business_timed(kcp_client_t *client, message_t *ou
|
|||||||
|
|
||||||
kcp_client_copy_server_error_body(out_msg, error_text, sizeof(error_text));
|
kcp_client_copy_server_error_body(out_msg, error_text, sizeof(error_text));
|
||||||
kcp_client_set_last_server_error(client, error_text);
|
kcp_client_set_last_server_error(client, error_text);
|
||||||
|
if (kcp_client_server_error_invalidates_registration(error_text)) {
|
||||||
|
kcp_client_set_registered(client, 0);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
latencylog_log_message_event(client->logger, OMNI_NODE_ROLE_PEER, client->id, EVENT_B_APP_RECV, out_msg);
|
latencylog_log_message_event(client->logger, OMNI_NODE_ROLE_PEER, client->id, EVENT_B_APP_RECV, out_msg);
|
||||||
return 0;
|
return 0;
|
||||||
@@ -399,6 +427,7 @@ kcp_client_t *kcp_client_dial_with_options(const char *server_addr, const char *
|
|||||||
snprintf(client->id, sizeof(client->id), "%s", peer_id);
|
snprintf(client->id, sizeof(client->id), "%s", peer_id);
|
||||||
snprintf(client->server_addr, sizeof(client->server_addr), "%s", server_addr == NULL ? "" : server_addr);
|
snprintf(client->server_addr, sizeof(client->server_addr), "%s", server_addr == NULL ? "" : server_addr);
|
||||||
pthread_mutex_init(&client->state_mu, NULL);
|
pthread_mutex_init(&client->state_mu, NULL);
|
||||||
|
client->last_server_activity_ms = omni_now_millis32();
|
||||||
client->logger = logger;
|
client->logger = logger;
|
||||||
client->conn = kcp_conn_dial_with_options(actual_dial_addr, bind_ip, bind_device, options, packet_logger, logger, OMNI_NODE_ROLE_PEER, peer_id, stats_logger, stats_interval_ms);
|
client->conn = kcp_conn_dial_with_options(actual_dial_addr, bind_ip, bind_device, options, packet_logger, logger, OMNI_NODE_ROLE_PEER, peer_id, stats_logger, stats_interval_ms);
|
||||||
if (client->conn == NULL) {
|
if (client->conn == NULL) {
|
||||||
@@ -448,6 +477,16 @@ int kcp_client_send_text(kcp_client_t *client, const char *to, const char *text)
|
|||||||
}
|
}
|
||||||
|
|
||||||
int kcp_client_send_binary(kcp_client_t *client, const char *to, const void *data, size_t data_len) {
|
int kcp_client_send_binary(kcp_client_t *client, const char *to, const void *data, size_t data_len) {
|
||||||
|
return kcp_client_send_binary_with_id(client, to, data, data_len, NULL);
|
||||||
|
}
|
||||||
|
|
||||||
|
int kcp_client_send_binary_with_id(
|
||||||
|
kcp_client_t *client,
|
||||||
|
const char *to,
|
||||||
|
const void *data,
|
||||||
|
size_t data_len,
|
||||||
|
uint64_t *out_id
|
||||||
|
) {
|
||||||
message_t msg;
|
message_t msg;
|
||||||
uint64_t id;
|
uint64_t id;
|
||||||
|
|
||||||
@@ -479,6 +518,9 @@ int kcp_client_send_binary(kcp_client_t *client, const char *to, const void *dat
|
|||||||
protocol_message_clear(&msg);
|
protocol_message_clear(&msg);
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
if (out_id != NULL) {
|
||||||
|
*out_id = id;
|
||||||
|
}
|
||||||
protocol_message_clear(&msg);
|
protocol_message_clear(&msg);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
@@ -596,6 +638,9 @@ void kcp_client_state_snapshot(kcp_client_t *client, kcp_client_state_t *out_sta
|
|||||||
}
|
}
|
||||||
pthread_mutex_lock(&client->state_mu);
|
pthread_mutex_lock(&client->state_mu);
|
||||||
out_state->registered = client->registered;
|
out_state->registered = client->registered;
|
||||||
|
out_state->server_idle_ms = client->last_server_activity_ms == 0
|
||||||
|
? 0
|
||||||
|
: (omni_now_millis32() - client->last_server_activity_ms);
|
||||||
snprintf(out_state->last_server_error, sizeof(out_state->last_server_error), "%s", client->last_server_error);
|
snprintf(out_state->last_server_error, sizeof(out_state->last_server_error), "%s", client->last_server_error);
|
||||||
pthread_mutex_unlock(&client->state_mu);
|
pthread_mutex_unlock(&client->state_mu);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -113,11 +113,17 @@ static int kcp_hub_peer_is_telemetry(const char *peer_id) {
|
|||||||
return kcp_hub_peer_id_has_suffix(peer_id, "-telemetry");
|
return kcp_hub_peer_id_has_suffix(peer_id, "-telemetry");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int kcp_hub_peer_is_video_receiver(const char *peer_id) {
|
||||||
|
return peer_id != NULL && strcmp(peer_id, "peer-a-video") == 0;
|
||||||
|
}
|
||||||
|
|
||||||
static int kcp_hub_peer_uses_server_lease(const char *peer_id) {
|
static int kcp_hub_peer_uses_server_lease(const char *peer_id) {
|
||||||
if (peer_id == NULL || peer_id[0] == '\0') {
|
if (peer_id == NULL || peer_id[0] == '\0') {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
return kcp_hub_peer_id_has_suffix(peer_id, "-ctrl") || kcp_hub_peer_is_telemetry(peer_id);
|
return kcp_hub_peer_id_has_suffix(peer_id, "-ctrl")
|
||||||
|
|| kcp_hub_peer_is_telemetry(peer_id)
|
||||||
|
|| kcp_hub_peer_is_video_receiver(peer_id);
|
||||||
}
|
}
|
||||||
|
|
||||||
static const char *kcp_hub_peer_node_id(const char *peer_id) {
|
static const char *kcp_hub_peer_node_id(const char *peer_id) {
|
||||||
@@ -222,7 +228,9 @@ static int kcp_hub_add_runtime_stats_json(cJSON *object, const kcp_runtime_stats
|
|||||||
cJSON_AddNumberToObject(object, "conv", (double) stats->conv) == NULL ||
|
cJSON_AddNumberToObject(object, "conv", (double) stats->conv) == NULL ||
|
||||||
cJSON_AddNumberToObject(object, "rto_ms", (double) stats->rto_ms) == NULL ||
|
cJSON_AddNumberToObject(object, "rto_ms", (double) stats->rto_ms) == NULL ||
|
||||||
cJSON_AddNumberToObject(object, "srtt_ms", (double) stats->srtt_ms) == NULL ||
|
cJSON_AddNumberToObject(object, "srtt_ms", (double) stats->srtt_ms) == NULL ||
|
||||||
|
cJSON_AddNumberToObject(object, "min_srtt_ms", (double) stats->min_srtt_ms) == NULL ||
|
||||||
cJSON_AddNumberToObject(object, "srttvar_ms", (double) stats->srttvar_ms) == NULL ||
|
cJSON_AddNumberToObject(object, "srttvar_ms", (double) stats->srttvar_ms) == NULL ||
|
||||||
|
cJSON_AddNumberToObject(object, "last_feedback_age_ms", (double) stats->last_feedback_age_ms) == NULL ||
|
||||||
cJSON_AddNumberToObject(object, "snd_wnd", (double) stats->snd_wnd) == NULL ||
|
cJSON_AddNumberToObject(object, "snd_wnd", (double) stats->snd_wnd) == NULL ||
|
||||||
cJSON_AddNumberToObject(object, "rmt_wnd", (double) stats->rmt_wnd) == NULL ||
|
cJSON_AddNumberToObject(object, "rmt_wnd", (double) stats->rmt_wnd) == NULL ||
|
||||||
cJSON_AddNumberToObject(object, "inflight", (double) stats->inflight) == NULL ||
|
cJSON_AddNumberToObject(object, "inflight", (double) stats->inflight) == NULL ||
|
||||||
|
|||||||
@@ -1,9 +1,14 @@
|
|||||||
#include "server_udp_relay.h"
|
#include "server_udp_relay.h"
|
||||||
|
|
||||||
#include <arpa/inet.h>
|
#include <arpa/inet.h>
|
||||||
|
#include <stdatomic.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <string.h>
|
||||||
#include <unistd.h>
|
#include <unistd.h>
|
||||||
|
|
||||||
#define UDP_RELAY_BUF_SIZE (64U * 1024U)
|
#define UDP_RELAY_BUF_SIZE (64U * 1024U)
|
||||||
|
#define UDP_RELAY_ROUTE_TIMEOUT_MS 30000U
|
||||||
|
#define UDP_RELAY_DEFAULT_PACKET_LOG_SAMPLE_EVERY 200U
|
||||||
|
|
||||||
struct udp_relay {
|
struct udp_relay {
|
||||||
int downstream_fd;
|
int downstream_fd;
|
||||||
@@ -15,8 +20,12 @@ struct udp_relay {
|
|||||||
struct sockaddr_storage client_addr;
|
struct sockaddr_storage client_addr;
|
||||||
socklen_t client_addr_len;
|
socklen_t client_addr_len;
|
||||||
int has_client;
|
int has_client;
|
||||||
|
uint32_t client_last_seen_ms;
|
||||||
|
struct udp_relay_route *routes;
|
||||||
pthread_mutex_t lock;
|
pthread_mutex_t lock;
|
||||||
pthread_mutex_t log_mu;
|
pthread_mutex_t log_mu;
|
||||||
|
unsigned int packet_log_sample_every;
|
||||||
|
atomic_ullong packet_log_counter;
|
||||||
pthread_mutex_t state_mu;
|
pthread_mutex_t state_mu;
|
||||||
pthread_cond_t state_cond;
|
pthread_cond_t state_cond;
|
||||||
pthread_t downstream_thread;
|
pthread_t downstream_thread;
|
||||||
@@ -29,6 +38,60 @@ struct udp_relay {
|
|||||||
int closed;
|
int closed;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
typedef struct udp_relay_route {
|
||||||
|
struct udp_relay_route *next;
|
||||||
|
uint32_t conv;
|
||||||
|
struct sockaddr_storage client_addr;
|
||||||
|
socklen_t client_addr_len;
|
||||||
|
uint32_t last_seen_ms;
|
||||||
|
} udp_relay_route_t;
|
||||||
|
|
||||||
|
static uint32_t udp_relay_now_ms(void) {
|
||||||
|
return omni_now_millis32();
|
||||||
|
}
|
||||||
|
|
||||||
|
static uint32_t udp_relay_elapsed_ms(uint32_t now_ms, uint32_t then_ms) {
|
||||||
|
return now_ms - then_ms;
|
||||||
|
}
|
||||||
|
|
||||||
|
static unsigned int udp_relay_packet_log_sample_every(void) {
|
||||||
|
const char *raw = getenv("OMNI_RELAY_PACKET_LOG_SAMPLE_EVERY");
|
||||||
|
unsigned long parsed;
|
||||||
|
char *endptr = NULL;
|
||||||
|
|
||||||
|
if (raw == NULL || raw[0] == '\0') {
|
||||||
|
return UDP_RELAY_DEFAULT_PACKET_LOG_SAMPLE_EVERY;
|
||||||
|
}
|
||||||
|
parsed = strtoul(raw, &endptr, 10);
|
||||||
|
if (endptr == raw || *endptr != '\0') {
|
||||||
|
return UDP_RELAY_DEFAULT_PACKET_LOG_SAMPLE_EVERY;
|
||||||
|
}
|
||||||
|
return (unsigned int) parsed;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int udp_relay_event_should_always_log(const char *event_name) {
|
||||||
|
return event_name != NULL && strstr(event_name, "_drop_") != NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int udp_relay_should_log_packet(udp_relay_t *relay, const char *event_name) {
|
||||||
|
unsigned long long seq;
|
||||||
|
|
||||||
|
if (relay == NULL) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
if (udp_relay_event_should_always_log(event_name)) {
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
if (relay->packet_log_sample_every == 0U) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
if (relay->packet_log_sample_every == 1U) {
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
seq = atomic_fetch_add_explicit(&relay->packet_log_counter, 1U, memory_order_relaxed) + 1U;
|
||||||
|
return (seq % (unsigned long long) relay->packet_log_sample_every) == 0U;
|
||||||
|
}
|
||||||
|
|
||||||
static void udp_relay_parse_kcp_summary(const uint8_t *packet, size_t len, int *has_conv, uint32_t *conv, size_t *segment_count) {
|
static void udp_relay_parse_kcp_summary(const uint8_t *packet, size_t len, int *has_conv, uint32_t *conv, size_t *segment_count) {
|
||||||
size_t offset = 0;
|
size_t offset = 0;
|
||||||
size_t count = 0;
|
size_t count = 0;
|
||||||
@@ -80,6 +143,9 @@ static void udp_relay_print_packet(udp_relay_t *relay, const char *event_name, c
|
|||||||
if (relay == NULL) {
|
if (relay == NULL) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
if (!udp_relay_should_log_packet(relay, event_name)) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
if (remote_addr != NULL && remote_addr_len > 0) {
|
if (remote_addr != NULL && remote_addr_len > 0) {
|
||||||
omni_sockaddr_to_string((const struct sockaddr *) remote_addr, remote_addr_len, remote_addr_text, sizeof(remote_addr_text));
|
omni_sockaddr_to_string((const struct sockaddr *) remote_addr, remote_addr_len, remote_addr_text, sizeof(remote_addr_text));
|
||||||
@@ -136,13 +202,88 @@ static void udp_relay_record_client(udp_relay_t *relay, const struct sockaddr_st
|
|||||||
memcpy(&relay->client_addr, addr, sizeof(*addr));
|
memcpy(&relay->client_addr, addr, sizeof(*addr));
|
||||||
relay->client_addr_len = addr_len;
|
relay->client_addr_len = addr_len;
|
||||||
relay->has_client = 1;
|
relay->has_client = 1;
|
||||||
|
relay->client_last_seen_ms = udp_relay_now_ms();
|
||||||
pthread_mutex_unlock(&relay->lock);
|
pthread_mutex_unlock(&relay->lock);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void udp_relay_prune_routes_locked(udp_relay_t *relay, uint32_t now_ms) {
|
||||||
|
udp_relay_route_t *prev = NULL;
|
||||||
|
udp_relay_route_t *route;
|
||||||
|
|
||||||
|
if (relay == NULL) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
route = relay->routes;
|
||||||
|
while (route != NULL) {
|
||||||
|
udp_relay_route_t *next = route->next;
|
||||||
|
|
||||||
|
if (udp_relay_elapsed_ms(now_ms, route->last_seen_ms) >= UDP_RELAY_ROUTE_TIMEOUT_MS) {
|
||||||
|
if (prev == NULL) {
|
||||||
|
relay->routes = next;
|
||||||
|
} else {
|
||||||
|
prev->next = next;
|
||||||
|
}
|
||||||
|
free(route);
|
||||||
|
route = next;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
prev = route;
|
||||||
|
route = next;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (relay->has_client && udp_relay_elapsed_ms(now_ms, relay->client_last_seen_ms) >= UDP_RELAY_ROUTE_TIMEOUT_MS) {
|
||||||
|
relay->has_client = 0;
|
||||||
|
relay->client_addr_len = 0;
|
||||||
|
memset(&relay->client_addr, 0, sizeof(relay->client_addr));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static int udp_relay_record_route(udp_relay_t *relay, uint32_t conv, const struct sockaddr_storage *addr, socklen_t addr_len) {
|
||||||
|
udp_relay_route_t *route;
|
||||||
|
uint32_t now_ms;
|
||||||
|
|
||||||
|
if (relay == NULL || addr == NULL || addr_len == 0) {
|
||||||
|
errno = EINVAL;
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
now_ms = udp_relay_now_ms();
|
||||||
|
pthread_mutex_lock(&relay->lock);
|
||||||
|
udp_relay_prune_routes_locked(relay, now_ms);
|
||||||
|
for (route = relay->routes; route != NULL; route = route->next) {
|
||||||
|
if (route->conv == conv) {
|
||||||
|
memcpy(&route->client_addr, addr, sizeof(*addr));
|
||||||
|
route->client_addr_len = addr_len;
|
||||||
|
route->last_seen_ms = now_ms;
|
||||||
|
pthread_mutex_unlock(&relay->lock);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
route = (udp_relay_route_t *) calloc(1, sizeof(*route));
|
||||||
|
if (route == NULL) {
|
||||||
|
pthread_mutex_unlock(&relay->lock);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
route->conv = conv;
|
||||||
|
memcpy(&route->client_addr, addr, sizeof(*addr));
|
||||||
|
route->client_addr_len = addr_len;
|
||||||
|
route->last_seen_ms = now_ms;
|
||||||
|
route->next = relay->routes;
|
||||||
|
relay->routes = route;
|
||||||
|
pthread_mutex_unlock(&relay->lock);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
static int udp_relay_copy_client(udp_relay_t *relay, struct sockaddr_storage *addr, socklen_t *addr_len) {
|
static int udp_relay_copy_client(udp_relay_t *relay, struct sockaddr_storage *addr, socklen_t *addr_len) {
|
||||||
int has_client;
|
int has_client;
|
||||||
|
uint32_t now_ms;
|
||||||
|
|
||||||
|
now_ms = udp_relay_now_ms();
|
||||||
pthread_mutex_lock(&relay->lock);
|
pthread_mutex_lock(&relay->lock);
|
||||||
|
udp_relay_prune_routes_locked(relay, now_ms);
|
||||||
has_client = relay->has_client;
|
has_client = relay->has_client;
|
||||||
if (has_client) {
|
if (has_client) {
|
||||||
memcpy(addr, &relay->client_addr, sizeof(*addr));
|
memcpy(addr, &relay->client_addr, sizeof(*addr));
|
||||||
@@ -152,6 +293,45 @@ static int udp_relay_copy_client(udp_relay_t *relay, struct sockaddr_storage *ad
|
|||||||
return has_client;
|
return has_client;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int udp_relay_copy_route(udp_relay_t *relay, uint32_t conv, struct sockaddr_storage *addr, socklen_t *addr_len) {
|
||||||
|
udp_relay_route_t *route;
|
||||||
|
uint32_t now_ms;
|
||||||
|
|
||||||
|
now_ms = udp_relay_now_ms();
|
||||||
|
pthread_mutex_lock(&relay->lock);
|
||||||
|
udp_relay_prune_routes_locked(relay, now_ms);
|
||||||
|
for (route = relay->routes; route != NULL; route = route->next) {
|
||||||
|
if (route->conv == conv) {
|
||||||
|
memcpy(addr, &route->client_addr, sizeof(*addr));
|
||||||
|
*addr_len = route->client_addr_len;
|
||||||
|
pthread_mutex_unlock(&relay->lock);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
pthread_mutex_unlock(&relay->lock);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void udp_relay_clear_routes(udp_relay_t *relay) {
|
||||||
|
udp_relay_route_t *route;
|
||||||
|
udp_relay_route_t *next;
|
||||||
|
|
||||||
|
if (relay == NULL) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
pthread_mutex_lock(&relay->lock);
|
||||||
|
route = relay->routes;
|
||||||
|
relay->routes = NULL;
|
||||||
|
pthread_mutex_unlock(&relay->lock);
|
||||||
|
|
||||||
|
while (route != NULL) {
|
||||||
|
next = route->next;
|
||||||
|
free(route);
|
||||||
|
route = next;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static void *udp_relay_forward_downstream_to_upstream(void *arg) {
|
static void *udp_relay_forward_downstream_to_upstream(void *arg) {
|
||||||
udp_relay_t *relay = (udp_relay_t *) arg;
|
udp_relay_t *relay = (udp_relay_t *) arg;
|
||||||
uint8_t buffer[UDP_RELAY_BUF_SIZE];
|
uint8_t buffer[UDP_RELAY_BUF_SIZE];
|
||||||
@@ -160,6 +340,8 @@ static void *udp_relay_forward_downstream_to_upstream(void *arg) {
|
|||||||
struct sockaddr_storage source;
|
struct sockaddr_storage source;
|
||||||
socklen_t source_len = sizeof(source);
|
socklen_t source_len = sizeof(source);
|
||||||
ssize_t n = recvfrom(relay->downstream_fd, buffer, sizeof(buffer), 0, (struct sockaddr *) &source, &source_len);
|
ssize_t n = recvfrom(relay->downstream_fd, buffer, sizeof(buffer), 0, (struct sockaddr *) &source, &source_len);
|
||||||
|
int has_conv = 0;
|
||||||
|
uint32_t conv = 0;
|
||||||
|
|
||||||
if (n < 0) {
|
if (n < 0) {
|
||||||
int errnum = errno;
|
int errnum = errno;
|
||||||
@@ -175,6 +357,10 @@ static void *udp_relay_forward_downstream_to_upstream(void *arg) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
udp_relay_record_client(relay, &source, source_len);
|
udp_relay_record_client(relay, &source, source_len);
|
||||||
|
udp_relay_parse_kcp_summary(buffer, (size_t) n, &has_conv, &conv, NULL);
|
||||||
|
if (has_conv) {
|
||||||
|
(void) udp_relay_record_route(relay, conv, &source, source_len);
|
||||||
|
}
|
||||||
udp_relay_print_packet(relay, "relay_downstream_rx", relay->downstream_local_addr, &source, source_len, buffer, (size_t) n);
|
udp_relay_print_packet(relay, "relay_downstream_rx", relay->downstream_local_addr, &source, source_len, buffer, (size_t) n);
|
||||||
for (;;) {
|
for (;;) {
|
||||||
if (send(relay->upstream_fd, buffer, (size_t) n, 0) >= 0) {
|
if (send(relay->upstream_fd, buffer, (size_t) n, 0) >= 0) {
|
||||||
@@ -205,6 +391,8 @@ static void *udp_relay_forward_upstream_to_downstream(void *arg) {
|
|||||||
struct sockaddr_storage client_addr;
|
struct sockaddr_storage client_addr;
|
||||||
socklen_t client_addr_len = 0;
|
socklen_t client_addr_len = 0;
|
||||||
ssize_t n = recv(relay->upstream_fd, buffer, sizeof(buffer), 0);
|
ssize_t n = recv(relay->upstream_fd, buffer, sizeof(buffer), 0);
|
||||||
|
int has_conv = 0;
|
||||||
|
uint32_t conv = 0;
|
||||||
|
|
||||||
if (n < 0) {
|
if (n < 0) {
|
||||||
int errnum = errno;
|
int errnum = errno;
|
||||||
@@ -220,7 +408,12 @@ static void *udp_relay_forward_upstream_to_downstream(void *arg) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
udp_relay_print_packet(relay, "relay_upstream_rx", relay->upstream_local_addr, &relay->upstream_addr, relay->upstream_addr_len, buffer, (size_t) n);
|
udp_relay_print_packet(relay, "relay_upstream_rx", relay->upstream_local_addr, &relay->upstream_addr, relay->upstream_addr_len, buffer, (size_t) n);
|
||||||
if (!udp_relay_copy_client(relay, &client_addr, &client_addr_len)) {
|
udp_relay_parse_kcp_summary(buffer, (size_t) n, &has_conv, &conv, NULL);
|
||||||
|
if (has_conv && !udp_relay_copy_route(relay, conv, &client_addr, &client_addr_len)) {
|
||||||
|
udp_relay_print_packet(relay, "relay_upstream_drop_unknown_conv", relay->upstream_local_addr, &relay->upstream_addr, relay->upstream_addr_len, buffer, (size_t) n);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (!has_conv && !udp_relay_copy_client(relay, &client_addr, &client_addr_len)) {
|
||||||
udp_relay_print_packet(relay, "relay_upstream_drop_no_client", relay->upstream_local_addr, &relay->upstream_addr, relay->upstream_addr_len, buffer, (size_t) n);
|
udp_relay_print_packet(relay, "relay_upstream_drop_no_client", relay->upstream_local_addr, &relay->upstream_addr, relay->upstream_addr_len, buffer, (size_t) n);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
@@ -315,6 +508,8 @@ udp_relay_t *udp_relay_open(const char *listen_addr, const char *upstream_addr)
|
|||||||
}
|
}
|
||||||
pthread_mutex_init(&relay->lock, NULL);
|
pthread_mutex_init(&relay->lock, NULL);
|
||||||
pthread_mutex_init(&relay->log_mu, NULL);
|
pthread_mutex_init(&relay->log_mu, NULL);
|
||||||
|
relay->packet_log_sample_every = udp_relay_packet_log_sample_every();
|
||||||
|
atomic_init(&relay->packet_log_counter, 0U);
|
||||||
pthread_mutex_init(&relay->state_mu, NULL);
|
pthread_mutex_init(&relay->state_mu, NULL);
|
||||||
pthread_cond_init(&relay->state_cond, NULL);
|
pthread_cond_init(&relay->state_cond, NULL);
|
||||||
return relay;
|
return relay;
|
||||||
@@ -409,6 +604,7 @@ void udp_relay_free(udp_relay_t *relay) {
|
|||||||
}
|
}
|
||||||
udp_relay_close(relay);
|
udp_relay_close(relay);
|
||||||
udp_relay_join_threads(relay);
|
udp_relay_join_threads(relay);
|
||||||
|
udp_relay_clear_routes(relay);
|
||||||
pthread_mutex_destroy(&relay->lock);
|
pthread_mutex_destroy(&relay->lock);
|
||||||
pthread_mutex_destroy(&relay->log_mu);
|
pthread_mutex_destroy(&relay->log_mu);
|
||||||
pthread_cond_destroy(&relay->state_cond);
|
pthread_cond_destroy(&relay->state_cond);
|
||||||
|
|||||||
@@ -72,6 +72,8 @@ struct kcp_conn {
|
|||||||
uint64_t pending_in_errs;
|
uint64_t pending_in_errs;
|
||||||
uint64_t pending_kcp_in_errs;
|
uint64_t pending_kcp_in_errs;
|
||||||
protocol_frame_decoder_t decoder;
|
protocol_frame_decoder_t decoder;
|
||||||
|
int32_t min_srtt_ms;
|
||||||
|
uint32_t last_feedback_ms;
|
||||||
uint8_t scratch[KCP_RECV_CHUNK_SIZE];
|
uint8_t scratch[KCP_RECV_CHUNK_SIZE];
|
||||||
latency_logger_t *logger;
|
latency_logger_t *logger;
|
||||||
char node_role[OMNI_MAX_NODE_ROLE];
|
char node_role[OMNI_MAX_NODE_ROLE];
|
||||||
@@ -307,6 +309,26 @@ static uint64_t kcp_counter_diff(uint64_t previous, uint64_t current) {
|
|||||||
return current < previous ? 0 : current - previous;
|
return current < previous ? 0 : current - previous;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void kcp_conn_update_min_srtt_locked(kcp_conn_t *conn) {
|
||||||
|
int32_t srtt_ms;
|
||||||
|
|
||||||
|
if (conn == NULL || conn->kcp == NULL) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
srtt_ms = conn->kcp->rx_srtt;
|
||||||
|
if (srtt_ms > 0 && (conn->min_srtt_ms <= 0 || srtt_ms < conn->min_srtt_ms)) {
|
||||||
|
conn->min_srtt_ms = srtt_ms;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void kcp_conn_note_feedback_locked(kcp_conn_t *conn) {
|
||||||
|
if (conn == NULL) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
conn->last_feedback_ms = omni_now_millis32();
|
||||||
|
kcp_conn_update_min_srtt_locked(conn);
|
||||||
|
}
|
||||||
|
|
||||||
static int kcp_process_sampler_matches(const kcp_process_sampler_t *sampler, kcp_session_stats_logger_t *logger, const char *node_role, const char *node_id, int stats_interval_ms) {
|
static int kcp_process_sampler_matches(const kcp_process_sampler_t *sampler, kcp_session_stats_logger_t *logger, const char *node_role, const char *node_id, int stats_interval_ms) {
|
||||||
if (sampler == NULL) {
|
if (sampler == NULL) {
|
||||||
return 0;
|
return 0;
|
||||||
@@ -729,20 +751,6 @@ static void kcp_process_sampler_release(kcp_process_sampler_t *sampler) {
|
|||||||
free(sampler);
|
free(sampler);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void kcp_process_sampler_request_sample(kcp_process_sampler_t *sampler, const char *reason) {
|
|
||||||
if (sampler == NULL) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
pthread_mutex_lock(&sampler->lock);
|
|
||||||
if (!sampler->stopped && !sampler->request_pending) {
|
|
||||||
sampler->request_pending = 1;
|
|
||||||
sampler->pending_request_id++;
|
|
||||||
snprintf(sampler->pending_reason, sizeof(sampler->pending_reason), "%s", reason == NULL ? "" : reason);
|
|
||||||
pthread_cond_broadcast(&sampler->cond);
|
|
||||||
}
|
|
||||||
pthread_mutex_unlock(&sampler->lock);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void kcp_process_sampler_request_sample_and_wait(kcp_process_sampler_t *sampler, const char *reason) {
|
static void kcp_process_sampler_request_sample_and_wait(kcp_process_sampler_t *sampler, const char *reason) {
|
||||||
uint64_t request_id;
|
uint64_t request_id;
|
||||||
|
|
||||||
@@ -1107,8 +1115,13 @@ static void kcp_log_session_snapshot(kcp_conn_t *conn, const char *reason) {
|
|||||||
record.rto_ms = conn->kcp->rx_rto;
|
record.rto_ms = conn->kcp->rx_rto;
|
||||||
record.has_srtt_ms = 1;
|
record.has_srtt_ms = 1;
|
||||||
record.srtt_ms = conn->kcp->rx_srtt;
|
record.srtt_ms = conn->kcp->rx_srtt;
|
||||||
|
kcp_conn_update_min_srtt_locked(conn);
|
||||||
|
record.has_min_srtt_ms = conn->min_srtt_ms > 0;
|
||||||
|
record.min_srtt_ms = conn->min_srtt_ms;
|
||||||
record.has_srttvar_ms = 1;
|
record.has_srttvar_ms = 1;
|
||||||
record.srttvar_ms = conn->kcp->rx_rttval;
|
record.srttvar_ms = conn->kcp->rx_rttval;
|
||||||
|
record.has_last_feedback_age_ms = conn->last_feedback_ms != 0;
|
||||||
|
record.last_feedback_age_ms = conn->last_feedback_ms == 0 ? 0 : (omni_now_millis32() - conn->last_feedback_ms);
|
||||||
record.has_snd_wnd = 1;
|
record.has_snd_wnd = 1;
|
||||||
record.snd_wnd = conn->kcp->snd_wnd;
|
record.snd_wnd = conn->kcp->snd_wnd;
|
||||||
record.has_rmt_wnd = 1;
|
record.has_rmt_wnd = 1;
|
||||||
@@ -1282,6 +1295,7 @@ static void *kcp_client_recv_thread_main(void *arg) {
|
|||||||
if (ikcp_input(conn->kcp, (const char *) buffer, n) != 0) {
|
if (ikcp_input(conn->kcp, (const char *) buffer, n) != 0) {
|
||||||
kcp_conn_record_error(conn);
|
kcp_conn_record_error(conn);
|
||||||
} else {
|
} else {
|
||||||
|
kcp_conn_note_feedback_locked(conn);
|
||||||
kcp_conn_record_input(conn, (int) n, segment_count);
|
kcp_conn_record_input(conn, (int) n, segment_count);
|
||||||
}
|
}
|
||||||
pthread_mutex_unlock(&conn->kcp_mu);
|
pthread_mutex_unlock(&conn->kcp_mu);
|
||||||
@@ -1644,6 +1658,7 @@ static void *kcp_listener_recv_thread_main(void *arg) {
|
|||||||
if (ikcp_input(conn->kcp, (const char *) buffer, n) != 0) {
|
if (ikcp_input(conn->kcp, (const char *) buffer, n) != 0) {
|
||||||
kcp_conn_record_error(conn);
|
kcp_conn_record_error(conn);
|
||||||
} else {
|
} else {
|
||||||
|
kcp_conn_note_feedback_locked(conn);
|
||||||
kcp_conn_record_input(conn, (int) n, segment_count);
|
kcp_conn_record_input(conn, (int) n, segment_count);
|
||||||
}
|
}
|
||||||
pthread_mutex_unlock(&conn->kcp_mu);
|
pthread_mutex_unlock(&conn->kcp_mu);
|
||||||
@@ -1771,8 +1786,6 @@ int kcp_conn_send(kcp_conn_t *conn, const message_t *msg) {
|
|||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
latencylog_log_message_event(conn->logger, conn->node_role, conn->node_id, EVENT_SEND_HANDOFF_BEGIN, msg);
|
latencylog_log_message_event(conn->logger, conn->node_role, conn->node_id, EVENT_SEND_HANDOFF_BEGIN, msg);
|
||||||
kcp_log_session_snapshot(conn, "send_handoff_begin");
|
|
||||||
kcp_process_sampler_request_sample(conn->process_sampler, "send_handoff_begin");
|
|
||||||
pthread_mutex_lock(&conn->kcp_mu);
|
pthread_mutex_lock(&conn->kcp_mu);
|
||||||
atomic_store(&conn->sock_state->last_send_errno, 0);
|
atomic_store(&conn->sock_state->last_send_errno, 0);
|
||||||
conn->kcp->current = omni_now_millis32();
|
conn->kcp->current = omni_now_millis32();
|
||||||
@@ -1791,8 +1804,6 @@ int kcp_conn_send(kcp_conn_t *conn, const message_t *msg) {
|
|||||||
free(frame);
|
free(frame);
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
kcp_log_session_snapshot(conn, "send_handoff_end");
|
|
||||||
kcp_process_sampler_request_sample(conn->process_sampler, "send_handoff_end");
|
|
||||||
latencylog_log_message_event(conn->logger, conn->node_role, conn->node_id, EVENT_SEND_HANDOFF_END, msg);
|
latencylog_log_message_event(conn->logger, conn->node_role, conn->node_id, EVENT_SEND_HANDOFF_END, msg);
|
||||||
free(frame);
|
free(frame);
|
||||||
return 0;
|
return 0;
|
||||||
@@ -1835,8 +1846,6 @@ int kcp_conn_receive_timed(kcp_conn_t *conn, message_t *out_msg, int timeout_ms)
|
|||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
free(frame);
|
free(frame);
|
||||||
kcp_log_session_snapshot(conn, "receive");
|
|
||||||
kcp_process_sampler_request_sample(conn->process_sampler, "receive");
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
pthread_mutex_lock(&conn->kcp_mu);
|
pthread_mutex_lock(&conn->kcp_mu);
|
||||||
@@ -1927,7 +1936,10 @@ void kcp_conn_runtime_stats_snapshot(kcp_conn_t *conn, kcp_runtime_stats_t *out_
|
|||||||
out_stats->conv = conn->kcp->conv;
|
out_stats->conv = conn->kcp->conv;
|
||||||
out_stats->rto_ms = conn->kcp->rx_rto;
|
out_stats->rto_ms = conn->kcp->rx_rto;
|
||||||
out_stats->srtt_ms = conn->kcp->rx_srtt;
|
out_stats->srtt_ms = conn->kcp->rx_srtt;
|
||||||
|
kcp_conn_update_min_srtt_locked(conn);
|
||||||
|
out_stats->min_srtt_ms = conn->min_srtt_ms;
|
||||||
out_stats->srttvar_ms = conn->kcp->rx_rttval;
|
out_stats->srttvar_ms = conn->kcp->rx_rttval;
|
||||||
|
out_stats->last_feedback_age_ms = conn->last_feedback_ms == 0 ? 0 : (omni_now_millis32() - conn->last_feedback_ms);
|
||||||
out_stats->snd_wnd = conn->kcp->snd_wnd;
|
out_stats->snd_wnd = conn->kcp->snd_wnd;
|
||||||
out_stats->rmt_wnd = conn->kcp->rmt_wnd;
|
out_stats->rmt_wnd = conn->kcp->rmt_wnd;
|
||||||
out_stats->inflight = conn->kcp->snd_nxt - conn->kcp->snd_una;
|
out_stats->inflight = conn->kcp->snd_nxt - conn->kcp->snd_una;
|
||||||
|
|||||||
@@ -18,7 +18,7 @@ tx_timestamp_debug_logger_t *tx_timestamp_debug_open_jsonl(const char *path) {
|
|||||||
fclose(file);
|
fclose(file);
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
omni_file_logger_init(&logger->file_logger, file);
|
omni_file_logger_init_path(&logger->file_logger, file, path, 0);
|
||||||
logger->enabled = 1;
|
logger->enabled = 1;
|
||||||
return logger;
|
return logger;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -28,6 +28,13 @@
|
|||||||
#define VIDEO_DEFAULT_CAMERA_DEVICE "/dev/video0"
|
#define VIDEO_DEFAULT_CAMERA_DEVICE "/dev/video0"
|
||||||
#define VIDEO_DEFAULT_PEER_ID "peer-b-video"
|
#define VIDEO_DEFAULT_PEER_ID "peer-b-video"
|
||||||
#define VIDEO_DEFAULT_TARGET_PEER "peer-a-video"
|
#define VIDEO_DEFAULT_TARGET_PEER "peer-a-video"
|
||||||
|
#define VIDEO_SOFT_BACKPRESSURE_SEGMENTS_DEFAULT 64
|
||||||
|
#define VIDEO_HARD_BACKPRESSURE_SEGMENTS_DEFAULT 192
|
||||||
|
#define VIDEO_HARD_BACKPRESSURE_HOLD_MS_DEFAULT 1000
|
||||||
|
#define VIDEO_DEFAULT_FRAME_STALL_RECONNECT_MS 3000
|
||||||
|
#define VIDEO_SOFT_BACKPRESSURE_WINDOW_PRESSURE_PCT 90.0
|
||||||
|
#define VIDEO_HARD_BACKPRESSURE_WINDOW_PRESSURE_PCT 98.0
|
||||||
|
#define VIDEO_SESSION_POLL_INTERVAL_MS 250
|
||||||
|
|
||||||
typedef struct video_buffer {
|
typedef struct video_buffer {
|
||||||
void *start;
|
void *start;
|
||||||
@@ -39,6 +46,7 @@ typedef struct video_sender {
|
|||||||
char target_peer[OMNI_MAX_PEER_ID];
|
char target_peer[OMNI_MAX_PEER_ID];
|
||||||
uint8_t *send_buffer;
|
uint8_t *send_buffer;
|
||||||
size_t send_buffer_cap;
|
size_t send_buffer_cap;
|
||||||
|
uint64_t next_frame_seq;
|
||||||
} video_sender_t;
|
} video_sender_t;
|
||||||
|
|
||||||
static int video_pipeline_stop_requested(volatile sig_atomic_t *stop_requested) {
|
static int video_pipeline_stop_requested(volatile sig_atomic_t *stop_requested) {
|
||||||
@@ -137,6 +145,20 @@ static const char *env_first_nonempty(const char *first, const char *second, con
|
|||||||
return fallback;
|
return fallback;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int env_int_or_default(const char *name, int fallback) {
|
||||||
|
const char *value = getenv(name);
|
||||||
|
int parsed;
|
||||||
|
|
||||||
|
if (value == NULL || value[0] == '\0') {
|
||||||
|
return fallback;
|
||||||
|
}
|
||||||
|
parsed = atoi(value);
|
||||||
|
if (parsed <= 0) {
|
||||||
|
return fallback;
|
||||||
|
}
|
||||||
|
return parsed;
|
||||||
|
}
|
||||||
|
|
||||||
static void video_pipeline_set_error(video_pipeline_stats_t *stats, const char *message) {
|
static void video_pipeline_set_error(video_pipeline_stats_t *stats, const char *message) {
|
||||||
if (stats == NULL) {
|
if (stats == NULL) {
|
||||||
return;
|
return;
|
||||||
@@ -161,6 +183,13 @@ static void video_pipeline_set_errno_error(video_pipeline_stats_t *stats, const
|
|||||||
video_pipeline_set_error(stats, buffer);
|
video_pipeline_set_error(stats, buffer);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void video_pipeline_report_progress(const video_pipeline_config_t *config) {
|
||||||
|
if (config == NULL || config->progress_callback == NULL) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
config->progress_callback(config->progress_context);
|
||||||
|
}
|
||||||
|
|
||||||
void video_pipeline_config_init(video_pipeline_config_t *config) {
|
void video_pipeline_config_init(video_pipeline_config_t *config) {
|
||||||
if (config == NULL) {
|
if (config == NULL) {
|
||||||
return;
|
return;
|
||||||
@@ -179,6 +208,13 @@ void video_pipeline_config_init(video_pipeline_config_t *config) {
|
|||||||
config->output_height = VIDEO_OUTPUT_HEIGHT_DEFAULT;
|
config->output_height = VIDEO_OUTPUT_HEIGHT_DEFAULT;
|
||||||
config->max_frames = 0;
|
config->max_frames = 0;
|
||||||
config->enable_timing_logs = 0;
|
config->enable_timing_logs = 0;
|
||||||
|
config->soft_backpressure_segments = VIDEO_SOFT_BACKPRESSURE_SEGMENTS_DEFAULT;
|
||||||
|
config->hard_backpressure_segments = VIDEO_HARD_BACKPRESSURE_SEGMENTS_DEFAULT;
|
||||||
|
config->hard_backpressure_hold_ms = VIDEO_HARD_BACKPRESSURE_HOLD_MS_DEFAULT;
|
||||||
|
config->frame_stall_reconnect_ms = VIDEO_DEFAULT_FRAME_STALL_RECONNECT_MS;
|
||||||
|
config->stats_logger = NULL;
|
||||||
|
config->stage_logger = NULL;
|
||||||
|
config->stats_interval_ms = 1000;
|
||||||
}
|
}
|
||||||
|
|
||||||
void video_pipeline_config_load_env(video_pipeline_config_t *config) {
|
void video_pipeline_config_load_env(video_pipeline_config_t *config) {
|
||||||
@@ -196,6 +232,11 @@ void video_pipeline_config_load_env(video_pipeline_config_t *config) {
|
|||||||
config->max_frames = atoi(getenv("OMNI_VIDEO_MAX_FRAMES"));
|
config->max_frames = atoi(getenv("OMNI_VIDEO_MAX_FRAMES"));
|
||||||
}
|
}
|
||||||
config->enable_timing_logs = env_flag_or_default("OMNI_VIDEO_DEBUG_TIMING", config->enable_timing_logs);
|
config->enable_timing_logs = env_flag_or_default("OMNI_VIDEO_DEBUG_TIMING", config->enable_timing_logs);
|
||||||
|
config->soft_backpressure_segments = env_int_or_default("OMNI_VIDEO_SOFT_BACKPRESSURE_SEGMENTS", config->soft_backpressure_segments);
|
||||||
|
config->hard_backpressure_segments = env_int_or_default("OMNI_VIDEO_HARD_BACKPRESSURE_SEGMENTS", config->hard_backpressure_segments);
|
||||||
|
config->hard_backpressure_hold_ms = env_int_or_default("OMNI_VIDEO_HARD_BACKPRESSURE_HOLD_MS", config->hard_backpressure_hold_ms);
|
||||||
|
config->frame_stall_reconnect_ms = env_int_or_default("OMNI_VIDEO_FRAME_STALL_RECONNECT_MS", config->frame_stall_reconnect_ms);
|
||||||
|
config->stats_interval_ms = env_int_or_default("BLITZ_KCP_STATS_INTERVAL_MS", config->stats_interval_ms);
|
||||||
}
|
}
|
||||||
|
|
||||||
int video_pipeline_stats_init(video_pipeline_stats_t *stats) {
|
int video_pipeline_stats_init(video_pipeline_stats_t *stats) {
|
||||||
@@ -229,9 +270,15 @@ void video_pipeline_stats_snapshot(video_pipeline_stats_t *stats, video_pipeline
|
|||||||
out_stats->frames_sent = stats->frames_sent;
|
out_stats->frames_sent = stats->frames_sent;
|
||||||
out_stats->bytes_sent = stats->bytes_sent;
|
out_stats->bytes_sent = stats->bytes_sent;
|
||||||
out_stats->send_errors = stats->send_errors;
|
out_stats->send_errors = stats->send_errors;
|
||||||
|
out_stats->backpressure_drops = stats->backpressure_drops;
|
||||||
|
out_stats->backlog_resets = stats->backlog_resets;
|
||||||
out_stats->last_frame_bytes = stats->last_frame_bytes;
|
out_stats->last_frame_bytes = stats->last_frame_bytes;
|
||||||
|
out_stats->last_backlog_segments = stats->last_backlog_segments;
|
||||||
|
out_stats->last_capture_to_send_ms = stats->last_capture_to_send_ms;
|
||||||
|
out_stats->avg_capture_to_send_ms = stats->avg_capture_to_send_ms;
|
||||||
out_stats->connected = stats->connected;
|
out_stats->connected = stats->connected;
|
||||||
snprintf(out_stats->last_error, sizeof(out_stats->last_error), "%s", stats->last_error);
|
snprintf(out_stats->last_error, sizeof(out_stats->last_error), "%s", stats->last_error);
|
||||||
|
snprintf(out_stats->last_backlog_reason, sizeof(out_stats->last_backlog_reason), "%s", stats->last_backlog_reason);
|
||||||
out_stats->transport = stats->transport;
|
out_stats->transport = stats->transport;
|
||||||
pthread_mutex_unlock(&stats->mutex);
|
pthread_mutex_unlock(&stats->mutex);
|
||||||
}
|
}
|
||||||
@@ -557,8 +604,8 @@ static int video_sender_init(video_sender_t *sender, const video_pipeline_config
|
|||||||
&options,
|
&options,
|
||||||
NULL,
|
NULL,
|
||||||
NULL,
|
NULL,
|
||||||
NULL,
|
config->stats_logger,
|
||||||
KCP_DEFAULT_STATS_INTERVAL_MS
|
config->stats_interval_ms
|
||||||
);
|
);
|
||||||
if (sender->client == NULL) {
|
if (sender->client == NULL) {
|
||||||
return -1;
|
return -1;
|
||||||
@@ -566,25 +613,80 @@ static int video_sender_init(video_sender_t *sender, const video_pipeline_config
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int video_sender_send_packet(video_sender_t *sender, const AVPacket *encoded_pkt, uint64_t timestamp) {
|
static int video_sender_drain_pending_messages(video_sender_t *sender) {
|
||||||
uint8_t *payload;
|
int drained = 0;
|
||||||
size_t payload_len;
|
|
||||||
int rc;
|
|
||||||
|
|
||||||
if (sender == NULL || sender->client == NULL || encoded_pkt == NULL) {
|
if (sender == NULL || sender->client == NULL) {
|
||||||
errno = EINVAL;
|
errno = EINVAL;
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
payload_len = (size_t) encoded_pkt->size + sizeof(timestamp);
|
for (;;) {
|
||||||
|
message_t msg;
|
||||||
|
int rc;
|
||||||
|
|
||||||
|
protocol_message_init(&msg);
|
||||||
|
rc = kcp_client_receive_timed(sender->client, &msg, 1);
|
||||||
|
if (rc == 1) {
|
||||||
|
protocol_message_clear(&msg);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
if (rc != 0) {
|
||||||
|
protocol_message_clear(&msg);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Drain unread server errors so an offline receiver cannot back up the reverse KCP stream.
|
||||||
|
protocol_message_clear(&msg);
|
||||||
|
drained += 1;
|
||||||
|
if (drained >= 8) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static int video_sender_send_packet(
|
||||||
|
video_sender_t *sender,
|
||||||
|
const AVPacket *encoded_pkt,
|
||||||
|
const video_pipeline_packet_metadata_t *metadata,
|
||||||
|
uint64_t *out_frame_seq
|
||||||
|
) {
|
||||||
|
uint8_t *payload;
|
||||||
|
size_t payload_len;
|
||||||
|
uint64_t frame_seq;
|
||||||
|
int rc;
|
||||||
|
|
||||||
|
if (sender == NULL || sender->client == NULL || encoded_pkt == NULL || metadata == NULL) {
|
||||||
|
errno = EINVAL;
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
frame_seq = sender->next_frame_seq + 1U;
|
||||||
|
payload_len = 8U + (size_t) encoded_pkt->size + sizeof(*metadata);
|
||||||
if (video_sender_ensure_buffer_capacity(sender, payload_len) != 0) {
|
if (video_sender_ensure_buffer_capacity(sender, payload_len) != 0) {
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
payload = sender->send_buffer;
|
payload = sender->send_buffer;
|
||||||
|
|
||||||
memcpy(payload, encoded_pkt->data, (size_t) encoded_pkt->size);
|
payload[0] = (uint8_t) (frame_seq >> 56);
|
||||||
memcpy(payload + encoded_pkt->size, ×tamp, sizeof(timestamp));
|
payload[1] = (uint8_t) (frame_seq >> 48);
|
||||||
|
payload[2] = (uint8_t) (frame_seq >> 40);
|
||||||
|
payload[3] = (uint8_t) (frame_seq >> 32);
|
||||||
|
payload[4] = (uint8_t) (frame_seq >> 24);
|
||||||
|
payload[5] = (uint8_t) (frame_seq >> 16);
|
||||||
|
payload[6] = (uint8_t) (frame_seq >> 8);
|
||||||
|
payload[7] = (uint8_t) frame_seq;
|
||||||
|
memcpy(payload + 8U, encoded_pkt->data, (size_t) encoded_pkt->size);
|
||||||
|
memcpy(payload + 8U + (size_t) encoded_pkt->size, metadata, sizeof(*metadata));
|
||||||
rc = kcp_client_send_binary(sender->client, sender->target_peer, payload, payload_len);
|
rc = kcp_client_send_binary(sender->client, sender->target_peer, payload, payload_len);
|
||||||
|
if (rc != 0) {
|
||||||
|
return rc;
|
||||||
|
}
|
||||||
|
sender->next_frame_seq = frame_seq;
|
||||||
|
if (out_frame_seq != NULL) {
|
||||||
|
*out_frame_seq = frame_seq;
|
||||||
|
}
|
||||||
|
rc = video_sender_drain_pending_messages(sender);
|
||||||
return rc;
|
return rc;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -602,6 +704,231 @@ static void video_sender_close(video_sender_t *sender) {
|
|||||||
sender->send_buffer_cap = 0;
|
sender->send_buffer_cap = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static uint32_t video_sender_backlog_segments(const kcp_runtime_stats_t *stats) {
|
||||||
|
if (stats == NULL) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
return stats->snd_queue + stats->snd_buffer;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int video_sender_soft_backpressure_active(const video_pipeline_config_t *config, const kcp_runtime_stats_t *transport) {
|
||||||
|
if (config == NULL || transport == NULL) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
return video_sender_backlog_segments(transport) >= (uint32_t) config->soft_backpressure_segments
|
||||||
|
|| transport->window_pressure_pct >= VIDEO_SOFT_BACKPRESSURE_WINDOW_PRESSURE_PCT;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int video_sender_hard_backpressure_active(const video_pipeline_config_t *config, const kcp_runtime_stats_t *transport) {
|
||||||
|
if (config == NULL || transport == NULL) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
return video_sender_backlog_segments(transport) >= (uint32_t) config->hard_backpressure_segments
|
||||||
|
|| transport->window_pressure_pct >= VIDEO_HARD_BACKPRESSURE_WINDOW_PRESSURE_PCT;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void video_pipeline_note_backpressure(
|
||||||
|
video_pipeline_stats_t *stats,
|
||||||
|
const char *reason,
|
||||||
|
const kcp_runtime_stats_t *transport,
|
||||||
|
int increment_drop,
|
||||||
|
int increment_reset
|
||||||
|
) {
|
||||||
|
if (stats == NULL) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
pthread_mutex_lock(&stats->mutex);
|
||||||
|
if (increment_drop) {
|
||||||
|
stats->backpressure_drops += 1;
|
||||||
|
}
|
||||||
|
if (increment_reset) {
|
||||||
|
stats->backlog_resets += 1;
|
||||||
|
}
|
||||||
|
if (transport != NULL) {
|
||||||
|
stats->last_backlog_segments = video_sender_backlog_segments(transport);
|
||||||
|
stats->transport = *transport;
|
||||||
|
} else {
|
||||||
|
stats->last_backlog_segments = 0;
|
||||||
|
}
|
||||||
|
snprintf(stats->last_backlog_reason, sizeof(stats->last_backlog_reason), "%s", reason == NULL ? "" : reason);
|
||||||
|
pthread_mutex_unlock(&stats->mutex);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void video_pipeline_note_capture_to_send(video_pipeline_stats_t *stats, uint32_t capture_to_send_ms) {
|
||||||
|
if (stats == NULL) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
pthread_mutex_lock(&stats->mutex);
|
||||||
|
stats->last_capture_to_send_ms = capture_to_send_ms;
|
||||||
|
if (stats->avg_capture_to_send_ms <= 0.0) {
|
||||||
|
stats->avg_capture_to_send_ms = (double) capture_to_send_ms;
|
||||||
|
} else {
|
||||||
|
stats->avg_capture_to_send_ms = stats->avg_capture_to_send_ms * 0.9 + (double) capture_to_send_ms * 0.1;
|
||||||
|
}
|
||||||
|
pthread_mutex_unlock(&stats->mutex);
|
||||||
|
}
|
||||||
|
|
||||||
|
static int video_stage_logger_should_log(const video_stage_logger_t *logger, uint64_t frame_seq) {
|
||||||
|
if (logger == NULL || !logger->enabled) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
if (logger->sample_mod <= 1U) {
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
return frame_seq % logger->sample_mod == 0U;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void video_stage_logger_log_frame(
|
||||||
|
video_stage_logger_t *logger,
|
||||||
|
uint64_t frame_seq,
|
||||||
|
double capture_ms,
|
||||||
|
double decode_ms,
|
||||||
|
double scale_ms,
|
||||||
|
double encode_ms,
|
||||||
|
double send_ms,
|
||||||
|
double pipeline_total_ms,
|
||||||
|
size_t jpeg_bytes,
|
||||||
|
uint64_t kcp_out_seg_delta,
|
||||||
|
uint32_t backlog_segments,
|
||||||
|
double window_pressure_pct,
|
||||||
|
int32_t video_srtt_ms
|
||||||
|
) {
|
||||||
|
char *line;
|
||||||
|
|
||||||
|
if (!video_stage_logger_should_log(logger, frame_seq)) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
line = omni_strdup_printf(
|
||||||
|
"{\"ts_unix_nano\":%" PRId64 ",\"frame_seq\":%" PRIu64 ",\"capture_ms\":%.3f,\"decode_ms\":%.3f,\"scale_ms\":%.3f,\"encode_ms\":%.3f,\"send_ms\":%.3f,\"pipeline_total_ms\":%.3f,\"jpeg_bytes\":%zu,\"kcp_out_seg_delta\":%" PRIu64 ",\"backlog_segments\":%u,\"window_pressure_pct\":%.3f,\"video_srtt_ms\":%d}",
|
||||||
|
omni_now_unix_nano(),
|
||||||
|
frame_seq,
|
||||||
|
capture_ms,
|
||||||
|
decode_ms,
|
||||||
|
scale_ms,
|
||||||
|
encode_ms,
|
||||||
|
send_ms,
|
||||||
|
pipeline_total_ms,
|
||||||
|
jpeg_bytes,
|
||||||
|
kcp_out_seg_delta,
|
||||||
|
backlog_segments,
|
||||||
|
window_pressure_pct,
|
||||||
|
video_srtt_ms
|
||||||
|
);
|
||||||
|
if (line == NULL) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
(void) omni_file_logger_write_line(&logger->file_logger, line);
|
||||||
|
free(line);
|
||||||
|
}
|
||||||
|
|
||||||
|
video_stage_logger_t *video_stage_logger_open_jsonl(const char *path, uint64_t sample_mod) {
|
||||||
|
video_stage_logger_t *logger;
|
||||||
|
FILE *file;
|
||||||
|
|
||||||
|
if (path == NULL || path[0] == '\0') {
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
if (omni_ensure_parent_dir(path) != 0) {
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
file = fopen(path, "ab");
|
||||||
|
if (file == NULL) {
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
logger = (video_stage_logger_t *) calloc(1, sizeof(*logger));
|
||||||
|
if (logger == NULL) {
|
||||||
|
fclose(file);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
omni_file_logger_init_path(&logger->file_logger, file, path, 0);
|
||||||
|
logger->enabled = 1;
|
||||||
|
logger->sample_mod = sample_mod == 0U ? 1U : sample_mod;
|
||||||
|
return logger;
|
||||||
|
}
|
||||||
|
|
||||||
|
void video_stage_logger_close(video_stage_logger_t *logger) {
|
||||||
|
if (logger == NULL) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (logger->file_logger.file != NULL) {
|
||||||
|
fclose(logger->file_logger.file);
|
||||||
|
}
|
||||||
|
omni_file_logger_destroy(&logger->file_logger);
|
||||||
|
free(logger);
|
||||||
|
}
|
||||||
|
|
||||||
|
static int video_server_error_requires_reconnect(const char *message) {
|
||||||
|
if (message == NULL || message[0] == '\0') {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
return strstr(message, "not registered") != NULL
|
||||||
|
|| strstr(message, "first message must be register") != NULL
|
||||||
|
|| strstr(message, "peer replaced") != NULL
|
||||||
|
|| strstr(message, "timed out waiting for server_register_ok") != NULL
|
||||||
|
|| strstr(message, "failed to acknowledge server heartbeat") != NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void video_pipeline_update_connection_state(
|
||||||
|
video_pipeline_stats_t *stats,
|
||||||
|
const kcp_client_state_t *client_state,
|
||||||
|
const kcp_runtime_stats_t *transport
|
||||||
|
) {
|
||||||
|
if (stats == NULL) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
pthread_mutex_lock(&stats->mutex);
|
||||||
|
if (transport != NULL) {
|
||||||
|
stats->transport = *transport;
|
||||||
|
}
|
||||||
|
if (client_state != NULL) {
|
||||||
|
stats->connected = client_state->connected != 0 && client_state->registered != 0;
|
||||||
|
if (client_state->last_server_error[0] != '\0') {
|
||||||
|
snprintf(stats->last_error, sizeof(stats->last_error), "%s", client_state->last_server_error);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
pthread_mutex_unlock(&stats->mutex);
|
||||||
|
}
|
||||||
|
|
||||||
|
static int video_sender_check_session_stale(
|
||||||
|
video_sender_t *sender,
|
||||||
|
const video_pipeline_config_t *config,
|
||||||
|
video_pipeline_stats_t *stats,
|
||||||
|
kcp_runtime_stats_t *transport_stats,
|
||||||
|
char *reason,
|
||||||
|
size_t reason_len
|
||||||
|
) {
|
||||||
|
kcp_client_state_t client_state;
|
||||||
|
|
||||||
|
if (
|
||||||
|
sender == NULL || sender->client == NULL || config == NULL || stats == NULL || transport_stats == NULL
|
||||||
|
|| reason == NULL || reason_len == 0
|
||||||
|
) {
|
||||||
|
errno = EINVAL;
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
reason[0] = '\0';
|
||||||
|
memset(&client_state, 0, sizeof(client_state));
|
||||||
|
kcp_client_runtime_stats_snapshot(sender->client, transport_stats);
|
||||||
|
kcp_client_state_snapshot(sender->client, &client_state);
|
||||||
|
video_pipeline_update_connection_state(stats, &client_state, transport_stats);
|
||||||
|
|
||||||
|
if (!transport_stats->connected || !client_state.connected) {
|
||||||
|
snprintf(reason, reason_len, "video session stale: transport disconnected");
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
if (!client_state.registered) {
|
||||||
|
snprintf(reason, reason_len, "video session stale: server reported unregistered");
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
if (video_server_error_requires_reconnect(client_state.last_server_error)) {
|
||||||
|
snprintf(reason, reason_len, "video session stale: server error %.180s", client_state.last_server_error);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
static void video_pipeline_cleanup_buffers(video_buffer_t *buffers, int num_buffers) {
|
static void video_pipeline_cleanup_buffers(video_buffer_t *buffers, int num_buffers) {
|
||||||
int i;
|
int i;
|
||||||
if (buffers == NULL) {
|
if (buffers == NULL) {
|
||||||
@@ -630,6 +957,14 @@ int video_pipeline_run(const video_pipeline_config_t *config, video_pipeline_sta
|
|||||||
int sws_src_width = 0;
|
int sws_src_width = 0;
|
||||||
int sws_src_height = 0;
|
int sws_src_height = 0;
|
||||||
int sws_src_format = -1;
|
int sws_src_format = -1;
|
||||||
|
uint32_t hard_backpressure_since_ms = 0;
|
||||||
|
uint32_t last_soft_drop_log_ms = 0;
|
||||||
|
uint32_t last_session_poll_ms = 0;
|
||||||
|
uint32_t last_successful_send_ms = 0;
|
||||||
|
uint64_t soft_drops_since_last_send = 0;
|
||||||
|
int have_sent_frame = 0;
|
||||||
|
const char *gpsd_host = env_or_default("OMNI_GPSD_HOST", "127.0.0.1");
|
||||||
|
int gps_buffer_started = 0;
|
||||||
|
|
||||||
memset(&sender, 0, sizeof(sender));
|
memset(&sender, 0, sizeof(sender));
|
||||||
if (stats == NULL) {
|
if (stats == NULL) {
|
||||||
@@ -677,6 +1012,11 @@ int video_pipeline_run(const video_pipeline_config_t *config, video_pipeline_sta
|
|||||||
video_pipeline_set_errno_error(stats, "failed to start video sender");
|
video_pipeline_set_errno_error(stats, "failed to start video sender");
|
||||||
goto cleanup;
|
goto cleanup;
|
||||||
}
|
}
|
||||||
|
if (gps_buffer_init(gpsd_host) != 0) {
|
||||||
|
fprintf(stderr, "[video_pipeline] failed to start GPS buffer using %s:2947\n", gpsd_host);
|
||||||
|
} else {
|
||||||
|
gps_buffer_started = 1;
|
||||||
|
}
|
||||||
|
|
||||||
pthread_mutex_lock(&stats->mutex);
|
pthread_mutex_lock(&stats->mutex);
|
||||||
stats->connected = 1;
|
stats->connected = 1;
|
||||||
@@ -713,7 +1053,10 @@ int video_pipeline_run(const video_pipeline_config_t *config, video_pipeline_sta
|
|||||||
AVFrame *decoded_frame = NULL;
|
AVFrame *decoded_frame = NULL;
|
||||||
AVFrame *scaled_frame = NULL;
|
AVFrame *scaled_frame = NULL;
|
||||||
AVPacket *encoded_pkt = NULL;
|
AVPacket *encoded_pkt = NULL;
|
||||||
|
kcp_runtime_stats_t transport_stats;
|
||||||
|
kcp_runtime_stats_t transport_after_send;
|
||||||
int select_rc;
|
int select_rc;
|
||||||
|
int should_log_stage = 0;
|
||||||
double total_start_ms = 0.0;
|
double total_start_ms = 0.0;
|
||||||
double capture_start_ms = 0.0;
|
double capture_start_ms = 0.0;
|
||||||
double capture_end_ms = 0.0;
|
double capture_end_ms = 0.0;
|
||||||
@@ -725,14 +1068,24 @@ int video_pipeline_run(const video_pipeline_config_t *config, video_pipeline_sta
|
|||||||
double encode_end_ms = 0.0;
|
double encode_end_ms = 0.0;
|
||||||
double send_start_ms = 0.0;
|
double send_start_ms = 0.0;
|
||||||
double send_end_ms = 0.0;
|
double send_end_ms = 0.0;
|
||||||
|
video_pipeline_packet_metadata_t packet_metadata;
|
||||||
|
char reconnect_reason[256];
|
||||||
int frame_number = frame_index + 1;
|
int frame_number = frame_index + 1;
|
||||||
|
uint64_t frame_seq = 0;
|
||||||
|
uint64_t out_segs_before_send = 0;
|
||||||
|
uint64_t out_segs_after_send = 0;
|
||||||
|
uint32_t capture_to_send_ms = 0;
|
||||||
|
|
||||||
|
memset(&transport_stats, 0, sizeof(transport_stats));
|
||||||
|
memset(&transport_after_send, 0, sizeof(transport_after_send));
|
||||||
|
memset(&packet_metadata, 0, sizeof(packet_metadata));
|
||||||
|
reconnect_reason[0] = '\0';
|
||||||
|
video_pipeline_report_progress(config);
|
||||||
|
|
||||||
if (config->max_frames > 0 && frame_index >= config->max_frames) {
|
if (config->max_frames > 0 && frame_index >= config->max_frames) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
if (config->enable_timing_logs) {
|
total_start_ms = video_pipeline_now_ms();
|
||||||
total_start_ms = video_pipeline_now_ms();
|
|
||||||
}
|
|
||||||
|
|
||||||
FD_ZERO(&fds);
|
FD_ZERO(&fds);
|
||||||
FD_SET(fd, &fds);
|
FD_SET(fd, &fds);
|
||||||
@@ -746,9 +1099,7 @@ int video_pipeline_run(const video_pipeline_config_t *config, video_pipeline_sta
|
|||||||
video_pipeline_set_errno_error(stats, "failed waiting for camera frame");
|
video_pipeline_set_errno_error(stats, "failed waiting for camera frame");
|
||||||
goto cleanup;
|
goto cleanup;
|
||||||
}
|
}
|
||||||
if (config->enable_timing_logs) {
|
capture_start_ms = video_pipeline_now_ms();
|
||||||
capture_start_ms = video_pipeline_now_ms();
|
|
||||||
}
|
|
||||||
|
|
||||||
memset(&buf, 0, sizeof(buf));
|
memset(&buf, 0, sizeof(buf));
|
||||||
buf.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
|
buf.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
|
||||||
@@ -757,10 +1108,8 @@ int video_pipeline_run(const video_pipeline_config_t *config, video_pipeline_sta
|
|||||||
video_pipeline_set_errno_error(stats, "failed to dequeue V4L2 buffer");
|
video_pipeline_set_errno_error(stats, "failed to dequeue V4L2 buffer");
|
||||||
goto cleanup;
|
goto cleanup;
|
||||||
}
|
}
|
||||||
if (config->enable_timing_logs) {
|
capture_end_ms = video_pipeline_now_ms();
|
||||||
capture_end_ms = video_pipeline_now_ms();
|
decode_start_ms = capture_end_ms;
|
||||||
decode_start_ms = capture_end_ms;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (decode_mjpeg_frame(decoder, (const uint8_t *) buffers[buf.index].start, (int) buf.bytesused, &decoded_frame) != 0) {
|
if (decode_mjpeg_frame(decoder, (const uint8_t *) buffers[buf.index].start, (int) buf.bytesused, &decoded_frame) != 0) {
|
||||||
if (config->enable_timing_logs) {
|
if (config->enable_timing_logs) {
|
||||||
@@ -769,10 +1118,8 @@ int video_pipeline_run(const video_pipeline_config_t *config, video_pipeline_sta
|
|||||||
(void) ioctl(fd, VIDIOC_QBUF, &buf);
|
(void) ioctl(fd, VIDIOC_QBUF, &buf);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
if (config->enable_timing_logs) {
|
decode_end_ms = video_pipeline_now_ms();
|
||||||
decode_end_ms = video_pipeline_now_ms();
|
scale_start_ms = decode_end_ms;
|
||||||
scale_start_ms = decode_end_ms;
|
|
||||||
}
|
|
||||||
if (
|
if (
|
||||||
ensure_scale_context(
|
ensure_scale_context(
|
||||||
&sws_ctx,
|
&sws_ctx,
|
||||||
@@ -792,10 +1139,8 @@ int video_pipeline_run(const video_pipeline_config_t *config, video_pipeline_sta
|
|||||||
(void) ioctl(fd, VIDIOC_QBUF, &buf);
|
(void) ioctl(fd, VIDIOC_QBUF, &buf);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
if (config->enable_timing_logs) {
|
scale_end_ms = video_pipeline_now_ms();
|
||||||
scale_end_ms = video_pipeline_now_ms();
|
encode_start_ms = scale_end_ms;
|
||||||
encode_start_ms = scale_end_ms;
|
|
||||||
}
|
|
||||||
if (encode_frame(encoder, scaled_frame, &encoded_pkt) != 0) {
|
if (encode_frame(encoder, scaled_frame, &encoded_pkt) != 0) {
|
||||||
if (config->enable_timing_logs) {
|
if (config->enable_timing_logs) {
|
||||||
video_pipeline_print_timing_failure(frame_number, "encode");
|
video_pipeline_print_timing_failure(frame_number, "encode");
|
||||||
@@ -805,12 +1150,166 @@ int video_pipeline_run(const video_pipeline_config_t *config, video_pipeline_sta
|
|||||||
(void) ioctl(fd, VIDIOC_QBUF, &buf);
|
(void) ioctl(fd, VIDIOC_QBUF, &buf);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
if (config->enable_timing_logs) {
|
encode_end_ms = video_pipeline_now_ms();
|
||||||
encode_end_ms = video_pipeline_now_ms();
|
send_start_ms = encode_end_ms;
|
||||||
send_start_ms = encode_end_ms;
|
|
||||||
|
{
|
||||||
|
gps_video_sample_t gps_sample = get_latest_gps_for_video();
|
||||||
|
|
||||||
|
packet_metadata.timestamp_ms = (uint64_t) get_realtime_ms();
|
||||||
|
packet_metadata.latitude = gps_sample.latitude;
|
||||||
|
packet_metadata.longitude = gps_sample.longitude;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (video_sender_send_packet(&sender, encoded_pkt, (uint64_t) get_realtime_ms()) != 0) {
|
if (
|
||||||
|
last_session_poll_ms == 0
|
||||||
|
|| omni_now_millis32() - last_session_poll_ms >= VIDEO_SESSION_POLL_INTERVAL_MS
|
||||||
|
) {
|
||||||
|
if (video_sender_drain_pending_messages(&sender) != 0) {
|
||||||
|
video_pipeline_set_errno_error(stats, "failed to poll video session");
|
||||||
|
av_frame_free(&decoded_frame);
|
||||||
|
av_frame_free(&scaled_frame);
|
||||||
|
av_packet_free(&encoded_pkt);
|
||||||
|
(void) ioctl(fd, VIDIOC_QBUF, &buf);
|
||||||
|
rc = VIDEO_PIPELINE_RUN_RETRY_IMMEDIATE;
|
||||||
|
goto cleanup;
|
||||||
|
}
|
||||||
|
if (
|
||||||
|
video_sender_check_session_stale(
|
||||||
|
&sender,
|
||||||
|
config,
|
||||||
|
stats,
|
||||||
|
&transport_stats,
|
||||||
|
reconnect_reason,
|
||||||
|
sizeof(reconnect_reason)
|
||||||
|
) != 0
|
||||||
|
) {
|
||||||
|
if (reconnect_reason[0] == '\0') {
|
||||||
|
snprintf(reconnect_reason, sizeof(reconnect_reason), "video session stale: poll failed");
|
||||||
|
}
|
||||||
|
video_pipeline_set_error(stats, reconnect_reason);
|
||||||
|
fprintf(stderr, "[video_pipeline] %s\n", reconnect_reason);
|
||||||
|
av_frame_free(&decoded_frame);
|
||||||
|
av_frame_free(&scaled_frame);
|
||||||
|
av_packet_free(&encoded_pkt);
|
||||||
|
(void) ioctl(fd, VIDIOC_QBUF, &buf);
|
||||||
|
rc = VIDEO_PIPELINE_RUN_RETRY_IMMEDIATE;
|
||||||
|
goto cleanup;
|
||||||
|
}
|
||||||
|
last_session_poll_ms = omni_now_millis32();
|
||||||
|
} else {
|
||||||
|
kcp_client_runtime_stats_snapshot(sender.client, &transport_stats);
|
||||||
|
}
|
||||||
|
if (video_sender_hard_backpressure_active(config, &transport_stats)) {
|
||||||
|
uint32_t now_ms = omni_now_millis32();
|
||||||
|
|
||||||
|
if (hard_backpressure_since_ms == 0) {
|
||||||
|
hard_backpressure_since_ms = now_ms;
|
||||||
|
}
|
||||||
|
if (now_ms - hard_backpressure_since_ms >= (uint32_t) config->hard_backpressure_hold_ms) {
|
||||||
|
char reason[128];
|
||||||
|
uint32_t backlog_segments = video_sender_backlog_segments(&transport_stats);
|
||||||
|
|
||||||
|
snprintf(
|
||||||
|
reason,
|
||||||
|
sizeof(reason),
|
||||||
|
"hard_reset backlog=%u snd_queue=%u snd_buffer=%u window_pressure=%.1f%% hold_ms=%d",
|
||||||
|
backlog_segments,
|
||||||
|
transport_stats.snd_queue,
|
||||||
|
transport_stats.snd_buffer,
|
||||||
|
transport_stats.window_pressure_pct,
|
||||||
|
config->hard_backpressure_hold_ms
|
||||||
|
);
|
||||||
|
video_pipeline_note_backpressure(stats, reason, &transport_stats, 0, 1);
|
||||||
|
video_pipeline_set_error(stats, reason);
|
||||||
|
fprintf(
|
||||||
|
stderr,
|
||||||
|
"[video_pipeline] backlog hard reset: backlog=%u snd_queue=%u snd_buffer=%u window_pressure=%.1f%% hold_ms=%d\n",
|
||||||
|
backlog_segments,
|
||||||
|
transport_stats.snd_queue,
|
||||||
|
transport_stats.snd_buffer,
|
||||||
|
transport_stats.window_pressure_pct,
|
||||||
|
config->hard_backpressure_hold_ms
|
||||||
|
);
|
||||||
|
av_frame_free(&decoded_frame);
|
||||||
|
av_frame_free(&scaled_frame);
|
||||||
|
av_packet_free(&encoded_pkt);
|
||||||
|
(void) ioctl(fd, VIDIOC_QBUF, &buf);
|
||||||
|
rc = VIDEO_PIPELINE_RUN_RETRY_IMMEDIATE;
|
||||||
|
goto cleanup;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
hard_backpressure_since_ms = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (video_sender_soft_backpressure_active(config, &transport_stats)) {
|
||||||
|
uint32_t now_ms = omni_now_millis32();
|
||||||
|
uint32_t backlog_segments = video_sender_backlog_segments(&transport_stats);
|
||||||
|
char reason[128];
|
||||||
|
|
||||||
|
snprintf(
|
||||||
|
reason,
|
||||||
|
sizeof(reason),
|
||||||
|
"soft_drop backlog=%u snd_queue=%u snd_buffer=%u window_pressure=%.1f%% threshold=%d",
|
||||||
|
backlog_segments,
|
||||||
|
transport_stats.snd_queue,
|
||||||
|
transport_stats.snd_buffer,
|
||||||
|
transport_stats.window_pressure_pct,
|
||||||
|
config->soft_backpressure_segments
|
||||||
|
);
|
||||||
|
video_pipeline_note_backpressure(stats, reason, &transport_stats, 1, 0);
|
||||||
|
soft_drops_since_last_send += 1;
|
||||||
|
if (now_ms - last_soft_drop_log_ms >= 1000U) {
|
||||||
|
fprintf(
|
||||||
|
stderr,
|
||||||
|
"[video_pipeline] soft drop: backlog=%u snd_queue=%u snd_buffer=%u window_pressure=%.1f%% threshold=%d\n",
|
||||||
|
backlog_segments,
|
||||||
|
transport_stats.snd_queue,
|
||||||
|
transport_stats.snd_buffer,
|
||||||
|
transport_stats.window_pressure_pct,
|
||||||
|
config->soft_backpressure_segments
|
||||||
|
);
|
||||||
|
last_soft_drop_log_ms = now_ms;
|
||||||
|
}
|
||||||
|
if (
|
||||||
|
have_sent_frame
|
||||||
|
&& config->frame_stall_reconnect_ms > 0
|
||||||
|
&& now_ms - last_successful_send_ms >= (uint32_t) config->frame_stall_reconnect_ms
|
||||||
|
) {
|
||||||
|
char stall_reason[192];
|
||||||
|
|
||||||
|
snprintf(
|
||||||
|
stall_reason,
|
||||||
|
sizeof(stall_reason),
|
||||||
|
"video pipeline stalled: no frames sent for %u ms while soft dropping (%llu drops, backlog=%u, srtt=%d ms)",
|
||||||
|
now_ms - last_successful_send_ms,
|
||||||
|
(unsigned long long) soft_drops_since_last_send,
|
||||||
|
backlog_segments,
|
||||||
|
transport_stats.srtt_ms
|
||||||
|
);
|
||||||
|
video_pipeline_set_error(stats, stall_reason);
|
||||||
|
fprintf(stderr, "[video_pipeline] %s\n", stall_reason);
|
||||||
|
av_frame_free(&decoded_frame);
|
||||||
|
av_frame_free(&scaled_frame);
|
||||||
|
av_packet_free(&encoded_pkt);
|
||||||
|
(void) ioctl(fd, VIDIOC_QBUF, &buf);
|
||||||
|
rc = VIDEO_PIPELINE_RUN_RETRY_IMMEDIATE;
|
||||||
|
goto cleanup;
|
||||||
|
}
|
||||||
|
av_frame_free(&decoded_frame);
|
||||||
|
av_frame_free(&scaled_frame);
|
||||||
|
av_packet_free(&encoded_pkt);
|
||||||
|
(void) ioctl(fd, VIDIOC_QBUF, &buf);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
capture_to_send_ms = send_start_ms <= capture_start_ms
|
||||||
|
? 0U
|
||||||
|
: (uint32_t) (send_start_ms - capture_start_ms + 0.5);
|
||||||
|
packet_metadata.capture_to_send_ms = capture_to_send_ms;
|
||||||
|
out_segs_before_send = transport_stats.out_segs_total;
|
||||||
|
|
||||||
|
if (video_sender_send_packet(&sender, encoded_pkt, &packet_metadata, &frame_seq) != 0) {
|
||||||
pthread_mutex_lock(&stats->mutex);
|
pthread_mutex_lock(&stats->mutex);
|
||||||
stats->send_errors += 1;
|
stats->send_errors += 1;
|
||||||
pthread_mutex_unlock(&stats->mutex);
|
pthread_mutex_unlock(&stats->mutex);
|
||||||
@@ -824,16 +1323,43 @@ int video_pipeline_run(const video_pipeline_config_t *config, video_pipeline_sta
|
|||||||
(void) ioctl(fd, VIDIOC_QBUF, &buf);
|
(void) ioctl(fd, VIDIOC_QBUF, &buf);
|
||||||
goto cleanup;
|
goto cleanup;
|
||||||
}
|
}
|
||||||
if (config->enable_timing_logs) {
|
send_end_ms = video_pipeline_now_ms();
|
||||||
send_end_ms = video_pipeline_now_ms();
|
should_log_stage = video_stage_logger_should_log(config->stage_logger, frame_seq);
|
||||||
|
if (should_log_stage) {
|
||||||
|
kcp_client_runtime_stats_snapshot(sender.client, &transport_after_send);
|
||||||
|
out_segs_after_send = transport_after_send.out_segs_total;
|
||||||
|
} else {
|
||||||
|
transport_after_send = transport_stats;
|
||||||
|
out_segs_after_send = out_segs_before_send;
|
||||||
}
|
}
|
||||||
|
video_pipeline_note_capture_to_send(stats, capture_to_send_ms);
|
||||||
|
|
||||||
pthread_mutex_lock(&stats->mutex);
|
pthread_mutex_lock(&stats->mutex);
|
||||||
stats->frames_sent += 1;
|
stats->frames_sent += 1;
|
||||||
stats->bytes_sent += (uint64_t) encoded_pkt->size;
|
stats->bytes_sent += (uint64_t) encoded_pkt->size;
|
||||||
stats->last_frame_bytes = (uint64_t) encoded_pkt->size;
|
stats->last_frame_bytes = (uint64_t) encoded_pkt->size;
|
||||||
kcp_client_runtime_stats_snapshot(sender.client, &stats->transport);
|
stats->transport = transport_after_send;
|
||||||
pthread_mutex_unlock(&stats->mutex);
|
pthread_mutex_unlock(&stats->mutex);
|
||||||
|
have_sent_frame = 1;
|
||||||
|
last_successful_send_ms = omni_now_millis32();
|
||||||
|
soft_drops_since_last_send = 0;
|
||||||
|
if (should_log_stage) {
|
||||||
|
video_stage_logger_log_frame(
|
||||||
|
config->stage_logger,
|
||||||
|
frame_seq,
|
||||||
|
capture_end_ms - capture_start_ms,
|
||||||
|
decode_end_ms - decode_start_ms,
|
||||||
|
scale_end_ms - scale_start_ms,
|
||||||
|
encode_end_ms - encode_start_ms,
|
||||||
|
send_end_ms - send_start_ms,
|
||||||
|
send_end_ms - total_start_ms,
|
||||||
|
(size_t) encoded_pkt->size,
|
||||||
|
out_segs_after_send >= out_segs_before_send ? out_segs_after_send - out_segs_before_send : 0U,
|
||||||
|
video_sender_backlog_segments(&transport_after_send),
|
||||||
|
transport_after_send.window_pressure_pct,
|
||||||
|
transport_after_send.srtt_ms
|
||||||
|
);
|
||||||
|
}
|
||||||
if (config->enable_timing_logs) {
|
if (config->enable_timing_logs) {
|
||||||
video_pipeline_print_timing_row(
|
video_pipeline_print_timing_row(
|
||||||
frame_number,
|
frame_number,
|
||||||
@@ -864,6 +1390,9 @@ cleanup:
|
|||||||
pthread_mutex_lock(&stats->mutex);
|
pthread_mutex_lock(&stats->mutex);
|
||||||
stats->connected = 0;
|
stats->connected = 0;
|
||||||
pthread_mutex_unlock(&stats->mutex);
|
pthread_mutex_unlock(&stats->mutex);
|
||||||
|
if (gps_buffer_started) {
|
||||||
|
gps_buffer_cleanup();
|
||||||
|
}
|
||||||
if (fd >= 0) {
|
if (fd >= 0) {
|
||||||
(void) ioctl(fd, VIDIOC_STREAMOFF, &type);
|
(void) ioctl(fd, VIDIOC_STREAMOFF, &type);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -161,6 +161,13 @@ static void video_pipeline_set_errno_error(video_pipeline_stats_t *stats, const
|
|||||||
video_pipeline_set_error(stats, buffer);
|
video_pipeline_set_error(stats, buffer);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void video_pipeline_report_progress(const video_pipeline_config_t *config) {
|
||||||
|
if (config == NULL || config->progress_callback == NULL) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
config->progress_callback(config->progress_context);
|
||||||
|
}
|
||||||
|
|
||||||
void video_pipeline_config_init(video_pipeline_config_t *config) {
|
void video_pipeline_config_init(video_pipeline_config_t *config) {
|
||||||
if (config == NULL) {
|
if (config == NULL) {
|
||||||
return;
|
return;
|
||||||
@@ -179,6 +186,8 @@ void video_pipeline_config_init(video_pipeline_config_t *config) {
|
|||||||
config->output_height = VIDEO_OUTPUT_HEIGHT_DEFAULT;
|
config->output_height = VIDEO_OUTPUT_HEIGHT_DEFAULT;
|
||||||
config->max_frames = 0;
|
config->max_frames = 0;
|
||||||
config->enable_timing_logs = 0;
|
config->enable_timing_logs = 0;
|
||||||
|
config->stats_logger = NULL;
|
||||||
|
config->stats_interval_ms = 1000;
|
||||||
}
|
}
|
||||||
|
|
||||||
void video_pipeline_config_load_env(video_pipeline_config_t *config) {
|
void video_pipeline_config_load_env(video_pipeline_config_t *config) {
|
||||||
@@ -196,6 +205,7 @@ void video_pipeline_config_load_env(video_pipeline_config_t *config) {
|
|||||||
config->max_frames = atoi(getenv("OMNI_VIDEO_MAX_FRAMES"));
|
config->max_frames = atoi(getenv("OMNI_VIDEO_MAX_FRAMES"));
|
||||||
}
|
}
|
||||||
config->enable_timing_logs = env_flag_or_default("OMNI_VIDEO_DEBUG_TIMING", config->enable_timing_logs);
|
config->enable_timing_logs = env_flag_or_default("OMNI_VIDEO_DEBUG_TIMING", config->enable_timing_logs);
|
||||||
|
config->stats_interval_ms = env_int_or_default("BLITZ_KCP_STATS_INTERVAL_MS", config->stats_interval_ms);
|
||||||
}
|
}
|
||||||
|
|
||||||
int video_pipeline_stats_init(video_pipeline_stats_t *stats) {
|
int video_pipeline_stats_init(video_pipeline_stats_t *stats) {
|
||||||
@@ -557,8 +567,8 @@ static int video_sender_init(video_sender_t *sender, const video_pipeline_config
|
|||||||
&options,
|
&options,
|
||||||
NULL,
|
NULL,
|
||||||
NULL,
|
NULL,
|
||||||
NULL,
|
config->stats_logger,
|
||||||
KCP_DEFAULT_STATS_INTERVAL_MS
|
config->stats_interval_ms
|
||||||
);
|
);
|
||||||
if (sender->client == NULL) {
|
if (sender->client == NULL) {
|
||||||
return -1;
|
return -1;
|
||||||
@@ -566,6 +576,32 @@ static int video_sender_init(video_sender_t *sender, const video_pipeline_config
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int video_sender_drain_pending_messages(video_sender_t *sender) {
|
||||||
|
if (sender == NULL || sender->client == NULL) {
|
||||||
|
errno = EINVAL;
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (;;) {
|
||||||
|
message_t msg;
|
||||||
|
int rc;
|
||||||
|
|
||||||
|
protocol_message_init(&msg);
|
||||||
|
rc = kcp_client_receive_timed(sender->client, &msg, 1);
|
||||||
|
if (rc == 1) {
|
||||||
|
protocol_message_clear(&msg);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
if (rc != 0) {
|
||||||
|
protocol_message_clear(&msg);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Drain unread server errors so an offline receiver cannot back up the reverse KCP stream.
|
||||||
|
protocol_message_clear(&msg);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static int video_sender_send_packet(video_sender_t *sender, const AVPacket *encoded_pkt, uint64_t timestamp) {
|
static int video_sender_send_packet(video_sender_t *sender, const AVPacket *encoded_pkt, uint64_t timestamp) {
|
||||||
uint8_t *payload;
|
uint8_t *payload;
|
||||||
size_t payload_len;
|
size_t payload_len;
|
||||||
@@ -585,6 +621,10 @@ static int video_sender_send_packet(video_sender_t *sender, const AVPacket *enco
|
|||||||
memcpy(payload, encoded_pkt->data, (size_t) encoded_pkt->size);
|
memcpy(payload, encoded_pkt->data, (size_t) encoded_pkt->size);
|
||||||
memcpy(payload + encoded_pkt->size, ×tamp, sizeof(timestamp));
|
memcpy(payload + encoded_pkt->size, ×tamp, sizeof(timestamp));
|
||||||
rc = kcp_client_send_binary(sender->client, sender->target_peer, payload, payload_len);
|
rc = kcp_client_send_binary(sender->client, sender->target_peer, payload, payload_len);
|
||||||
|
if (rc != 0) {
|
||||||
|
return rc;
|
||||||
|
}
|
||||||
|
rc = video_sender_drain_pending_messages(sender);
|
||||||
return rc;
|
return rc;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -727,6 +767,8 @@ int video_pipeline_run(const video_pipeline_config_t *config, video_pipeline_sta
|
|||||||
double send_end_ms = 0.0;
|
double send_end_ms = 0.0;
|
||||||
int frame_number = frame_index + 1;
|
int frame_number = frame_index + 1;
|
||||||
|
|
||||||
|
video_pipeline_report_progress(config);
|
||||||
|
|
||||||
if (config->max_frames > 0 && frame_index >= config->max_frames) {
|
if (config->max_frames > 0 && frame_index >= config->max_frames) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user