feat: 日志增强功能

This commit is contained in:
2026-04-14 20:52:41 +08:00
parent 579e67a3db
commit e895cdc9de
35 changed files with 1324 additions and 21 deletions

View File

@@ -14,6 +14,7 @@
#include "cJSON.h" #include "cJSON.h"
#include "control_protocol.h" #include "control_protocol.h"
#include "latencylog.h"
#include "protocol.h" #include "protocol.h"
#include "video_pipeline.h" #include "video_pipeline.h"
@@ -26,6 +27,8 @@
#define DEFAULT_VIDEO_THREAD_FAULT_FILE "fault-injection-bside-video-thread-stall" #define DEFAULT_VIDEO_THREAD_FAULT_FILE "fault-injection-bside-video-thread-stall"
#define DEFAULT_CONTROL_THREAD_FAULT_FILE "fault-injection-bside-control-thread-stall" #define DEFAULT_CONTROL_THREAD_FAULT_FILE "fault-injection-bside-control-thread-stall"
#define DEFAULT_THREAD_HEARTBEAT_TIMEOUT_SEC 15 #define DEFAULT_THREAD_HEARTBEAT_TIMEOUT_SEC 15
#define DEFAULT_KCP_STATS_INTERVAL_MS 1000
#define DEFAULT_CONTROL_LATENCY_SAMPLE_MOD 100
#define EXIT_CODE_VIDEO_THREAD_STALLED 101 #define EXIT_CODE_VIDEO_THREAD_STALLED 101
#define EXIT_CODE_CONTROL_THREAD_STALLED 102 #define EXIT_CODE_CONTROL_THREAD_STALLED 102
@@ -65,11 +68,15 @@ typedef struct daemon_state {
int control_server_idle_reconnect_ms; int control_server_idle_reconnect_ms;
const char *runtime_dir; const char *runtime_dir;
int heartbeat_timeout_sec; int heartbeat_timeout_sec;
int stats_interval_ms;
uint64_t control_latency_sample_mod;
char status_file_path[512]; char status_file_path[512];
char video_thread_fault_file[512]; char video_thread_fault_file[512];
char control_thread_fault_file[512]; char control_thread_fault_file[512];
atomic_long video_thread_heartbeat_epoch_sec; atomic_long video_thread_heartbeat_epoch_sec;
atomic_long control_thread_heartbeat_epoch_sec; atomic_long control_thread_heartbeat_epoch_sec;
kcp_session_stats_logger_t *stats_logger;
latency_logger_t *control_latency_logger;
unix_dgram_client_t unix_client; unix_dgram_client_t unix_client;
control_bridge_stats_t control_stats; control_bridge_stats_t control_stats;
} daemon_state_t; } daemon_state_t;
@@ -127,6 +134,21 @@ static int env_int_or_default(const char *name, int fallback) {
return parsed; return parsed;
} }
static uint64_t env_u64_or_default(const char *name, uint64_t fallback) {
const char *value = getenv(name);
unsigned long long parsed = 0ULL;
char *endptr = NULL;
if (value == NULL || value[0] == '\0') {
return fallback;
}
parsed = strtoull(value, &endptr, 10);
if (endptr == value || *endptr != '\0' || parsed == 0ULL) {
return fallback;
}
return (uint64_t) parsed;
}
static int64_t realtime_epoch_ms(void) { static int64_t realtime_epoch_ms(void) {
struct timespec ts; struct timespec ts;
@@ -145,6 +167,19 @@ static void update_thread_heartbeat(atomic_long *heartbeat) {
atomic_store(heartbeat, realtime_epoch_sec()); atomic_store(heartbeat, realtime_epoch_sec());
} }
static int should_log_control_latency(const daemon_state_t *state, const message_t *msg) {
uint64_t sample_mod;
if (state == NULL || state->control_latency_logger == NULL || msg == NULL) {
return 0;
}
sample_mod = state->control_latency_sample_mod;
if (sample_mod <= 1U) {
return 1;
}
return msg->id % sample_mod == 0U;
}
static void video_pipeline_heartbeat_progress(void *context) { static void video_pipeline_heartbeat_progress(void *context) {
update_thread_heartbeat((atomic_long *) context); update_thread_heartbeat((atomic_long *) context);
} }
@@ -556,8 +591,8 @@ static void *control_thread_main(void *arg) {
&options, &options,
NULL, NULL,
NULL, NULL,
NULL, state->stats_logger,
KCP_DEFAULT_STATS_INTERVAL_MS state->stats_interval_ms
); );
if (client == NULL) { if (client == NULL) {
control_bridge_set_errno_error(&state->control_stats, "failed to connect control session"); control_bridge_set_errno_error(&state->control_stats, "failed to connect control session");
@@ -692,6 +727,11 @@ static void *control_thread_main(void *arg) {
continue; continue;
} }
if (should_log_control_latency(state, &msg)) {
latencylog_log_message_event(state->control_latency_logger, OMNI_NODE_ROLE_PEER, state->control_peer_id, EVENT_B_APP_RECV, &msg);
latencylog_log_message_event(state->control_latency_logger, OMNI_NODE_ROLE_PEER, state->control_peer_id, EVENT_B_PERSIST_BEGIN, &msg);
}
if (unix_dgram_client_send(&state->unix_client, msg.body, msg.body_len) != 0) { if (unix_dgram_client_send(&state->unix_client, msg.body, msg.body_len) != 0) {
int send_errno = errno; int send_errno = errno;
int recovered = 0; int recovered = 0;
@@ -708,6 +748,9 @@ static void *control_thread_main(void *arg) {
state->control_stats.server_idle_ms = client_state.server_idle_ms; state->control_stats.server_idle_ms = client_state.server_idle_ms;
kcp_client_runtime_stats_snapshot(client, &state->control_stats.transport); kcp_client_runtime_stats_snapshot(client, &state->control_stats.transport);
pthread_mutex_unlock(&state->control_stats.mutex); pthread_mutex_unlock(&state->control_stats.mutex);
if (should_log_control_latency(state, &msg)) {
latencylog_log_message_event(state->control_latency_logger, OMNI_NODE_ROLE_PEER, state->control_peer_id, EVENT_B_PERSIST_END, &msg);
}
protocol_message_clear(&msg); protocol_message_clear(&msg);
continue; continue;
} }
@@ -728,6 +771,9 @@ static void *control_thread_main(void *arg) {
state->control_stats.server_idle_ms = client_state.server_idle_ms; state->control_stats.server_idle_ms = client_state.server_idle_ms;
kcp_client_runtime_stats_snapshot(client, &state->control_stats.transport); kcp_client_runtime_stats_snapshot(client, &state->control_stats.transport);
pthread_mutex_unlock(&state->control_stats.mutex); pthread_mutex_unlock(&state->control_stats.mutex);
if (should_log_control_latency(state, &msg)) {
latencylog_log_message_event(state->control_latency_logger, OMNI_NODE_ROLE_PEER, state->control_peer_id, EVENT_B_PERSIST_END, &msg);
}
protocol_message_clear(&msg); protocol_message_clear(&msg);
} }
@@ -799,8 +845,12 @@ int main(void) {
"BLITZ_OMNID_THREAD_HEARTBEAT_TIMEOUT_SEC", "BLITZ_OMNID_THREAD_HEARTBEAT_TIMEOUT_SEC",
DEFAULT_THREAD_HEARTBEAT_TIMEOUT_SEC DEFAULT_THREAD_HEARTBEAT_TIMEOUT_SEC
); );
state.stats_interval_ms = env_int_or_default("BLITZ_KCP_STATS_INTERVAL_MS", DEFAULT_KCP_STATS_INTERVAL_MS);
state.control_latency_sample_mod = env_u64_or_default("BLITZ_CONTROL_LATENCY_LOG_SAMPLE_MOD", DEFAULT_CONTROL_LATENCY_SAMPLE_MOD);
state.video_config.progress_callback = video_pipeline_heartbeat_progress; state.video_config.progress_callback = video_pipeline_heartbeat_progress;
state.video_config.progress_context = &state.video_thread_heartbeat_epoch_sec; state.video_config.progress_context = &state.video_thread_heartbeat_epoch_sec;
state.video_config.stats_logger = NULL;
state.video_config.stats_interval_ms = state.stats_interval_ms;
state.control_server_idle_reconnect_ms = env_int_or_default( state.control_server_idle_reconnect_ms = env_int_or_default(
"OMNI_CONTROL_SERVER_IDLE_RECONNECT_MS", "OMNI_CONTROL_SERVER_IDLE_RECONNECT_MS",
CONTROL_DEFAULT_SERVER_IDLE_RECONNECT_MS CONTROL_DEFAULT_SERVER_IDLE_RECONNECT_MS
@@ -860,11 +910,34 @@ int main(void) {
return 1; return 1;
} }
{
const char *stats_log_path = getenv("BLITZ_KCP_STATS_LOG_PATH");
const char *latency_log_path = getenv("BLITZ_CONTROL_LATENCY_LOG_PATH");
int latency_enabled = env_int_or_default("BLITZ_CONTROL_LATENCY_LOG_ENABLED", 1);
if (stats_log_path != NULL && stats_log_path[0] != '\0') {
state.stats_logger = kcp_session_stats_open_jsonl(stats_log_path);
if (state.stats_logger == NULL) {
fprintf(stderr, "[b_side_omnid] warning: failed to open KCP stats log %s\n", stats_log_path);
}
}
if (latency_enabled && latency_log_path != NULL && latency_log_path[0] != '\0') {
state.control_latency_logger = latencylog_open_jsonl(latency_log_path);
if (state.control_latency_logger == NULL) {
fprintf(stderr, "[b_side_omnid] warning: failed to open control latency log %s\n", latency_log_path);
}
}
state.video_config.stats_logger = state.stats_logger;
state.video_config.stats_interval_ms = state.stats_interval_ms;
}
if (pthread_create(&video_thread, NULL, video_thread_main, &state) != 0) { if (pthread_create(&video_thread, NULL, video_thread_main, &state) != 0) {
perror("pthread_create(video_thread)"); perror("pthread_create(video_thread)");
unix_dgram_client_close(&state.unix_client); unix_dgram_client_close(&state.unix_client);
control_bridge_stats_destroy(&state.control_stats); control_bridge_stats_destroy(&state.control_stats);
video_pipeline_stats_destroy(&state.video_stats); video_pipeline_stats_destroy(&state.video_stats);
latencylog_close(state.control_latency_logger);
kcp_session_stats_close(state.stats_logger);
return 1; return 1;
} }
if (pthread_create(&control_thread, NULL, control_thread_main, &state) != 0) { if (pthread_create(&control_thread, NULL, control_thread_main, &state) != 0) {
@@ -874,6 +947,8 @@ int main(void) {
unix_dgram_client_close(&state.unix_client); unix_dgram_client_close(&state.unix_client);
control_bridge_stats_destroy(&state.control_stats); control_bridge_stats_destroy(&state.control_stats);
video_pipeline_stats_destroy(&state.video_stats); video_pipeline_stats_destroy(&state.video_stats);
latencylog_close(state.control_latency_logger);
kcp_session_stats_close(state.stats_logger);
return 1; return 1;
} }
@@ -891,5 +966,7 @@ int main(void) {
unix_dgram_client_close(&state.unix_client); unix_dgram_client_close(&state.unix_client);
control_bridge_stats_destroy(&state.control_stats); control_bridge_stats_destroy(&state.control_stats);
video_pipeline_stats_destroy(&state.video_stats); video_pipeline_stats_destroy(&state.video_stats);
latencylog_close(state.control_latency_logger);
kcp_session_stats_close(state.stats_logger);
return 0; return 0;
} }

View File

@@ -3,6 +3,7 @@
#include <errno.h> #include <errno.h>
#include <inttypes.h> #include <inttypes.h>
#include <limits.h>
#include <pthread.h> #include <pthread.h>
#include <stdarg.h> #include <stdarg.h>
#include <stdbool.h> #include <stdbool.h>
@@ -31,6 +32,15 @@
typedef struct omni_file_logger { typedef struct omni_file_logger {
FILE *file; FILE *file;
pthread_mutex_t mutex; pthread_mutex_t mutex;
char path[PATH_MAX];
size_t current_bytes;
size_t buffered_bytes;
size_t flush_bytes;
size_t max_bytes;
int flush_interval_ms;
int max_files;
int immediate_flush;
uint64_t last_flush_monotonic_ms;
} omni_file_logger_t; } omni_file_logger_t;
int64_t omni_now_unix_nano(void); int64_t omni_now_unix_nano(void);
@@ -61,6 +71,7 @@ double omni_duration_ms_to_ns(double ms);
const char *omni_path_base_name(const char *path); const char *omni_path_base_name(const char *path);
void omni_file_logger_init(omni_file_logger_t *logger, FILE *file); void omni_file_logger_init(omni_file_logger_t *logger, FILE *file);
void omni_file_logger_init_path(omni_file_logger_t *logger, FILE *file, const char *path, int immediate_flush);
void omni_file_logger_destroy(omni_file_logger_t *logger); void omni_file_logger_destroy(omni_file_logger_t *logger);
int omni_file_logger_write_line(omni_file_logger_t *logger, const char *line); int omni_file_logger_write_line(omni_file_logger_t *logger, const char *line);

View File

@@ -43,6 +43,8 @@ typedef struct video_pipeline_config {
int hard_backpressure_hold_ms; int hard_backpressure_hold_ms;
int server_idle_reconnect_ms; int server_idle_reconnect_ms;
int frame_stall_reconnect_ms; int frame_stall_reconnect_ms;
kcp_session_stats_logger_t *stats_logger;
int stats_interval_ms;
video_pipeline_progress_fn progress_callback; video_pipeline_progress_fn progress_callback;
void *progress_context; void *progress_context;
} video_pipeline_config_t; } video_pipeline_config_t;

View File

@@ -0,0 +1,137 @@
#!/usr/bin/env bash
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
# shellcheck disable=SC1091
source "${SCRIPT_DIR}/common.sh"
STEP="5g-link-logger"
resolve_target_ip() {
if [[ -n "${BLITZ_TIME_SERVER_IP:-}" ]]; then
printf '%s\n' "${BLITZ_TIME_SERVER_IP}"
return 0
fi
for candidate in ${BLITZ_5G_ROUTE_TARGETS//,/ }; do
if [[ -n "${candidate}" ]]; then
printf '%s\n' "${candidate}"
return 0
fi
done
return 1
}
emit_sample_json() {
local interface_name="${1:-}"
local target_ip="${2:-}"
python3 - "${interface_name}" "${target_ip}" <<'PY'
import json
import subprocess
import sys
import time
interface_name = sys.argv[1]
target_ip = sys.argv[2]
payload = {
"ts_unix_ms": time.time_ns() // 1_000_000,
"interface": interface_name,
"target_ip": target_ip,
"link_present": False,
"route_output": "",
"route_ok": False,
"probe_ok": False,
"ping_rtt_ms": None,
"rx_bytes": 0,
"tx_bytes": 0,
"rx_packets": 0,
"tx_packets": 0,
"rx_errors": 0,
"tx_errors": 0,
"rx_drops": 0,
"tx_drops": 0,
}
if interface_name:
try:
output = subprocess.check_output(
["ip", "-j", "-s", "link", "show", "dev", interface_name],
text=True,
stderr=subprocess.DEVNULL,
)
stats = json.loads(output)
if stats:
item = stats[0]
payload["link_present"] = True
rx = item.get("stats64", {}).get("rx", {})
tx = item.get("stats64", {}).get("tx", {})
if not rx and not tx:
rx = item.get("stats", {}).get("rx", {})
tx = item.get("stats", {}).get("tx", {})
payload["rx_bytes"] = int(rx.get("bytes") or 0)
payload["tx_bytes"] = int(tx.get("bytes") or 0)
payload["rx_packets"] = int(rx.get("packets") or 0)
payload["tx_packets"] = int(tx.get("packets") or 0)
payload["rx_errors"] = int(rx.get("errors") or 0)
payload["tx_errors"] = int(tx.get("errors") or 0)
payload["rx_drops"] = int(rx.get("dropped") or 0)
payload["tx_drops"] = int(tx.get("dropped") or 0)
except Exception:
pass
if target_ip:
try:
route = subprocess.check_output(
["ip", "route", "get", target_ip],
text=True,
stderr=subprocess.STDOUT,
).strip()
payload["route_output"] = route.splitlines()[0] if route else ""
payload["route_ok"] = bool(payload["route_output"]) and (
not interface_name or f" dev {interface_name}" in payload["route_output"]
)
except Exception as exc:
payload["route_output"] = str(exc)
ping_cmd = ["ping", "-c", "1", "-W", "2", target_ip]
if interface_name:
ping_cmd[1:1] = ["-I", interface_name]
ping = subprocess.run(ping_cmd, capture_output=True, text=True)
payload["probe_ok"] = ping.returncode == 0
output = (ping.stdout or "") + "\n" + (ping.stderr or "")
for token in output.replace("\n", " ").split():
if token.startswith("time="):
value = token.split("=", 1)[1].rstrip("ms")
try:
payload["ping_rtt_ms"] = float(value)
except ValueError:
pass
break
print(json.dumps(payload, separators=(",", ":"), ensure_ascii=False))
PY
}
if [[ "${OMNI_BOOT_MODE:-0}" == "1" ]]; then
blitz_load_boot_env
blitz_require_run_context
fi
if [[ -z "${BLITZ_RUN_DIR:-}" && -f "${BLITZ_RUN_CONTEXT_FILE:-}" ]]; then
blitz_load_run_context_env || true
fi
blitz_ensure_instance_id
export BLITZ_5G_LINK_LOG_PATH="${BLITZ_5G_LINK_LOG_PATH:-${BLITZ_RUN_DIR}/b-5g-link-quality.${BLITZ_INSTANCE_ID}.jsonl}"
target_ip="$(resolve_target_ip || true)"
blitz_log "${STEP}" "start" "start" "path=${BLITZ_5G_LINK_LOG_PATH} interval_sec=${BLITZ_5G_LINK_LOG_INTERVAL_SEC}" 0
while true; do
interface_name="$(blitz_resolve_5g_interface || true)"
line="$(emit_sample_json "${interface_name}" "${target_ip}")"
blitz_jsonl_append_line "${BLITZ_5G_LINK_LOG_PATH}" "${line}"
sleep "${BLITZ_5G_LINK_LOG_INTERVAL_SEC}"
done

View File

@@ -0,0 +1,50 @@
#!/usr/bin/env bash
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
# shellcheck disable=SC1091
source "${SCRIPT_DIR}/common.sh"
STEP="incident-launch"
incident_id=""
args=()
timeout_bin=""
while (($# > 0)); do
case "$1" in
--incident-id)
incident_id="${2:-}"
shift 2
;;
*)
args+=("$1")
shift
;;
esac
done
blitz_load_boot_env
blitz_require_root "${STEP}"
blitz_require_command systemd-run "${STEP}"
blitz_require_command timeout "${STEP}"
timeout_bin="$(command -v timeout)"
if [[ -z "${incident_id}" ]]; then
incident_id="$(blitz_new_incident_id)"
fi
unit_name="blitz-incident-${incident_id//[^A-Za-z0-9_.-]/-}"
systemd-run \
--quiet \
--collect \
--unit "${unit_name}" \
--property=Type=oneshot \
--property="StandardOutput=append:${BLITZ_LOG_FILE}" \
--property="StandardError=append:${BLITZ_LOG_FILE}" \
"${timeout_bin}" "${BLITZ_INCIDENT_TOTAL_TIMEOUT_SEC}s" \
/bin/bash "${SCRIPT_DIR}/blitz-incident-capture.sh" \
--incident-id "${incident_id}" \
"${args[@]}"
printf '%s\n' "${incident_id}"

View File

@@ -0,0 +1,131 @@
#!/usr/bin/env bash
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
# shellcheck disable=SC1091
source "${SCRIPT_DIR}/common.sh"
STEP="incident-capture"
incident_id=""
incident_source=""
incident_reason=""
incident_unit=""
incident_result=""
incident_exit_status=""
run_capture() {
local output_path="$1"
shift
if command -v timeout >/dev/null 2>&1; then
timeout "${BLITZ_INCIDENT_COMMAND_TIMEOUT_SEC}s" "$@" > "${output_path}" 2>&1 || true
else
"$@" > "${output_path}" 2>&1 || true
fi
}
while (($# > 0)); do
case "$1" in
--incident-id)
incident_id="${2:-}"
shift 2
;;
--source)
incident_source="${2:-}"
shift 2
;;
--reason)
incident_reason="${2:-}"
shift 2
;;
--unit)
incident_unit="${2:-}"
shift 2
;;
--result)
incident_result="${2:-}"
shift 2
;;
--exit-status)
incident_exit_status="${2:-}"
shift 2
;;
*)
blitz_log "${STEP}" "parse-arg" "failure" "unknown argument: $1" 2
exit 2
;;
esac
done
if [[ -n "${incident_result}" && "${incident_result}" == "success" ]]; then
exit 0
fi
blitz_load_boot_env
blitz_load_run_context_env || true
blitz_prepare_runtime_dir
blitz_prepare_run_root
if [[ -z "${incident_id}" ]]; then
incident_id="$(blitz_new_incident_id)"
fi
incident_dir="${BLITZ_RUN_ROOT}/incidents/${incident_id}"
mkdir -p "${incident_dir}"
python3 - "${incident_dir}/incident.json" "${incident_id}" "${BLITZ_RUN_ID:-}" "${incident_source}" "${incident_reason}" "${incident_unit}" "${incident_result}" "${incident_exit_status}" "${BLITZ_RUN_DIR:-}" "${HOSTNAME:-$(hostname)}" <<'PY'
import json
import sys
import time
path, incident_id, run_id, source, reason, unit, result, exit_status, run_dir, hostname = sys.argv[1:10]
payload = {
"incident_id": incident_id,
"run_id": run_id,
"source": source,
"fault_reason": reason,
"unit": unit,
"service_result": result,
"exit_status": exit_status,
"run_dir": run_dir,
"hostname": hostname,
"captured_at": time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()),
}
with open(path, "w", encoding="utf-8") as handle:
json.dump(payload, handle, ensure_ascii=False, indent=2, sort_keys=True)
PY
for status_file in \
"${BLITZ_RUNTIME_DIR}/watchdog.status.json" \
"${BLITZ_RUNTIME_DIR}/b-side-omnid.status.json" \
"${BLITZ_RUNTIME_DIR}/ros-receiver.status.json"
do
if [[ -f "${status_file}" ]]; then
cp -f "${status_file}" "${incident_dir}/$(basename "${status_file}")"
fi
done
if [[ -f "${BLITZ_LOG_FILE}" ]]; then
tail -n 400 "${BLITZ_LOG_FILE}" > "${incident_dir}/startup.log.tail"
fi
run_capture "${incident_dir}/systemctl-status.txt" \
systemctl status blitz-robot.target blitz-run-context.service blitz-5g-dial.service blitz-5g-link-logger.service blitz-ros-receiver.service blitz-b-side-omnid.service blitz-watchdog.service
run_capture "${incident_dir}/journal.txt" \
journalctl --no-pager --since "5 minutes ago" -u blitz-run-context.service -u blitz-5g-dial.service -u blitz-5g-link-logger.service -u blitz-ros-receiver.service -u blitz-b-side-omnid.service -u blitz-watchdog.service
run_capture "${incident_dir}/ip-addr.txt" ip addr
run_capture "${incident_dir}/ip-route.txt" ip route
run_capture "${incident_dir}/ss-uapn.txt" ss -uapn
run_capture "${incident_dir}/ss-xlp.txt" ss -xlp
if [[ -f "${BLITZ_5G_INFO_JSON:-}" ]]; then
cp -f "${BLITZ_5G_INFO_JSON}" "${incident_dir}/$(basename "${BLITZ_5G_INFO_JSON}")"
fi
if [[ -n "${BLITZ_RUN_DIR:-}" && -d "${BLITZ_RUN_DIR}" ]]; then
while IFS= read -r -d '' jsonl; do
tail -n 200 "${jsonl}" > "${incident_dir}/tail-$(basename "${jsonl}")"
done < <(find "${BLITZ_RUN_DIR}" -maxdepth 1 -type f -name '*.jsonl' -print0 2>/dev/null)
fi
blitz_log "${STEP}" "complete" "success" "incident_id=${incident_id} path=${incident_dir}" 0

View File

@@ -0,0 +1,14 @@
#!/usr/bin/env bash
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
# shellcheck disable=SC1091
source "${SCRIPT_DIR}/common.sh"
STEP="run-context"
blitz_load_boot_env
blitz_require_root "${STEP}"
blitz_require_command python3 "${STEP}"
blitz_init_run_context
blitz_log "${STEP}" "complete" "success" "run_id=${BLITZ_RUN_ID} run_dir=${BLITZ_RUN_DIR}" 0

View File

@@ -12,6 +12,10 @@ B_SIDE_STATUS_FILE=""
ROS_STATUS_FILE="" ROS_STATUS_FILE=""
WATCHDOG_STATUS_FILE="" WATCHDOG_STATUS_FILE=""
NETWORK_FAULT_FILE="" NETWORK_FAULT_FILE=""
WATCHDOG_EVENT_LOG=""
WATCHDOG_SAMPLE_LOG=""
WATCHDOG_EVENT_LOG_FAILURE_REPORTED=0
WATCHDOG_SAMPLE_LOG_FAILURE_REPORTED=0
CAMERA_MISSING_PREV=0 CAMERA_MISSING_PREV=0
CAMERA_RECOVERY_STABLE_COUNT=0 CAMERA_RECOVERY_STABLE_COUNT=0
NETWORK_FAIL_COUNT=0 NETWORK_FAIL_COUNT=0
@@ -28,6 +32,8 @@ GPS_LAST_CHECK_SEC=0
GPS_DEVICE_PRESENT_PREV=-1 GPS_DEVICE_PRESENT_PREV=-1
GPS_DEVICE_PRESENT_STATE=1 GPS_DEVICE_PRESENT_STATE=1
GPS_STACK_ACTIVE_STATE=1 GPS_STACK_ACTIVE_STATE=1
LAST_REPORTED_FAULT_REASON=""
LAST_REPORTED_RECOVERY_STATE=""
declare -A TARGETED_RESTART_WINDOW_START=() declare -A TARGETED_RESTART_WINDOW_START=()
declare -A TARGETED_RESTART_WINDOW_COUNT=() declare -A TARGETED_RESTART_WINDOW_COUNT=()
@@ -236,6 +242,116 @@ EOF
mv -f "${tmp_file}" "${WATCHDOG_STATUS_FILE}" mv -f "${tmp_file}" "${WATCHDOG_STATUS_FILE}"
} }
watchdog_emit_json() {
local record_type="$1"
local action="$2"
local fault_reason="$3"
local recovery_state="$4"
local detail="$5"
local incident_id="${6:-}"
local network_ok="${7:-1}"
local camera_ok="${8:-1}"
local ros_ok="${9:-1}"
local bside_ok="${10:-1}"
local gps_ok="${11:-1}"
local gps_device_present="${12:-1}"
python3 - "${record_type}" "${action}" "${fault_reason}" "${recovery_state}" "${detail}" "${incident_id}" "${network_ok}" "${camera_ok}" "${ros_ok}" "${bside_ok}" "${gps_ok}" "${gps_device_present}" "${LAST_ACTION}" "${LAST_ACTION_EPOCH_MS}" "${NETWORK_FAIL_COUNT}" "$(targeted_restart_total)" "${FULL_RESTART_WINDOW_COUNT}" <<'PY'
import json
import sys
import time
record_type, action, fault_reason, recovery_state, detail, incident_id, network_ok, camera_ok, ros_ok, bside_ok, gps_ok, gps_device_present, last_action, last_action_epoch_ms, network_fail_count, targeted_restart_count, full_restart_count = sys.argv[1:18]
payload = {
"ts_unix_ms": time.time_ns() // 1_000_000,
"record_type": record_type,
"action": action,
"fault_reason": fault_reason,
"recovery_state": recovery_state,
"detail": detail,
"incident_id": incident_id or None,
"network_ok": network_ok == "1",
"camera_ok": camera_ok == "1",
"ros_ok": ros_ok == "1",
"bside_ok": bside_ok == "1",
"gps_ok": gps_ok == "1",
"gps_device_present": gps_device_present == "1",
"network_fail_count": int(network_fail_count),
"targeted_restart_count": int(targeted_restart_count),
"full_restart_count": int(full_restart_count),
"last_action": last_action,
"last_action_epoch_ms": int(last_action_epoch_ms or 0),
}
print(json.dumps(payload, separators=(",", ":"), ensure_ascii=False))
PY
}
watchdog_append_event() {
local line=""
[[ -n "${WATCHDOG_EVENT_LOG}" ]] || return 0
if ! line="$(watchdog_emit_json "$@" 2>&1)"; then
if (( WATCHDOG_EVENT_LOG_FAILURE_REPORTED == 0 )); then
blitz_log "${STEP}" "watchdog-event-log" "failure" "path=${WATCHDOG_EVENT_LOG} detail=${line}" 0 || true
WATCHDOG_EVENT_LOG_FAILURE_REPORTED=1
fi
return 0
fi
if ! blitz_jsonl_append_line "${WATCHDOG_EVENT_LOG}" "${line}"; then
if (( WATCHDOG_EVENT_LOG_FAILURE_REPORTED == 0 )); then
blitz_log "${STEP}" "watchdog-event-log" "failure" "path=${WATCHDOG_EVENT_LOG} detail=append-failed" 0 || true
WATCHDOG_EVENT_LOG_FAILURE_REPORTED=1
fi
return 0
fi
WATCHDOG_EVENT_LOG_FAILURE_REPORTED=0
}
watchdog_append_sample() {
local line=""
[[ -n "${WATCHDOG_SAMPLE_LOG}" ]] || return 0
if ! line="$(watchdog_emit_json "$@" 2>&1)"; then
if (( WATCHDOG_SAMPLE_LOG_FAILURE_REPORTED == 0 )); then
blitz_log "${STEP}" "watchdog-sample-log" "failure" "path=${WATCHDOG_SAMPLE_LOG} detail=${line}" 0 || true
WATCHDOG_SAMPLE_LOG_FAILURE_REPORTED=1
fi
return 0
fi
if ! blitz_jsonl_append_line "${WATCHDOG_SAMPLE_LOG}" "${line}"; then
if (( WATCHDOG_SAMPLE_LOG_FAILURE_REPORTED == 0 )); then
blitz_log "${STEP}" "watchdog-sample-log" "failure" "path=${WATCHDOG_SAMPLE_LOG} detail=append-failed" 0 || true
WATCHDOG_SAMPLE_LOG_FAILURE_REPORTED=1
fi
return 0
fi
WATCHDOG_SAMPLE_LOG_FAILURE_REPORTED=0
}
watchdog_record_state_transition() {
local fault_reason="$1"
local recovery_state="$2"
if [[ "${fault_reason}" == "${LAST_REPORTED_FAULT_REASON}" && "${recovery_state}" == "${LAST_REPORTED_RECOVERY_STATE}" ]]; then
return 0
fi
watchdog_append_event "event" "state-transition" "${fault_reason}" "${recovery_state}" "" ""
LAST_REPORTED_FAULT_REASON="${fault_reason}"
LAST_REPORTED_RECOVERY_STATE="${recovery_state}"
}
watchdog_launch_incident() {
local reason="$1"
local unit_name="$2"
"${BOOT_SCRIPT_DIR}/blitz-incident-capture-launch.sh" \
--source watchdog \
--reason "${reason}" \
--unit "${unit_name}" \
--result failure \
--exit-status 1 2>/dev/null || true
}
set_last_action() { set_last_action() {
LAST_ACTION="$1" LAST_ACTION="$1"
LAST_ACTION_EPOCH_MS="$(now_epoch_ms)" LAST_ACTION_EPOCH_MS="$(now_epoch_ms)"
@@ -283,6 +399,7 @@ record_full_restart() {
fi fi
if (( FULL_RESTART_WINDOW_COUNT >= 3 )); then if (( FULL_RESTART_WINDOW_COUNT >= 3 )); then
BACKOFF_UNTIL=$(( now_sec + 60 )) BACKOFF_UNTIL=$(( now_sec + 60 ))
watchdog_append_event "event" "backoff-enter" "backoff" "backoff" "full_restart_count=${FULL_RESTART_WINDOW_COUNT}" ""
fi fi
} }
@@ -290,36 +407,45 @@ restart_bside_targeted() {
local fault_key="$1" local fault_key="$1"
local reason="$2" local reason="$2"
local rc local rc
local incident_id=""
if register_targeted_restart "${fault_key}"; then if register_targeted_restart "${fault_key}"; then
blitz_log "${STEP}" "escalate-full-restart" "start" "reason=${reason}" 0 blitz_log "${STEP}" "escalate-full-restart" "start" "reason=${reason}" 0
watchdog_append_event "event" "escalate-full-restart" "${reason}-escalated" "recovering" "fault_key=${fault_key}" ""
full_restart_stack "${reason}-escalated" full_restart_stack "${reason}-escalated"
return 0 return 0
fi fi
incident_id="$(watchdog_launch_incident "${reason}" "${B_SIDE_SERVICE}")"
set_last_action "restart-bside" set_last_action "restart-bside"
RECOVERY_ACTION_TAKEN=1 RECOVERY_ACTION_TAKEN=1
blitz_log "${STEP}" "restart-bside" "start" "reason=${reason}" 0 blitz_log "${STEP}" "restart-bside" "start" "reason=${reason}" 0
watchdog_append_event "event" "restart-bside-start" "${reason}" "recovering" "fault_key=${fault_key}" "${incident_id}"
if systemctl restart "${B_SIDE_SERVICE}"; then if systemctl restart "${B_SIDE_SERVICE}"; then
blitz_log "${STEP}" "restart-bside" "success" "reason=${reason}" 0 blitz_log "${STEP}" "restart-bside" "success" "reason=${reason}" 0
watchdog_append_event "event" "restart-bside-success" "${reason}" "recovering" "fault_key=${fault_key}" "${incident_id}"
return 0 return 0
fi fi
rc=$? rc=$?
blitz_log "${STEP}" "restart-bside" "failure" "reason=${reason}" "${rc}" blitz_log "${STEP}" "restart-bside" "failure" "reason=${reason}" "${rc}"
watchdog_append_event "event" "restart-bside-failure" "${reason}" "recovering" "fault_key=${fault_key} rc=${rc}" "${incident_id}"
return "${rc}" return "${rc}"
} }
full_restart_stack() { full_restart_stack() {
local reason="$1" local reason="$1"
local rc local rc
local incident_id=""
incident_id="$(watchdog_launch_incident "${reason}" "blitz-robot.target")"
set_last_action "full-restart" set_last_action "full-restart"
RECOVERY_ACTION_TAKEN=1 RECOVERY_ACTION_TAKEN=1
recovery_state="recovering" recovery_state="recovering"
fault_reason="${reason}" fault_reason="${reason}"
blitz_log "${STEP}" "full-restart-stop-bside" "start" "reason=${reason}" 0 blitz_log "${STEP}" "full-restart-stop-bside" "start" "reason=${reason}" 0
watchdog_append_event "event" "full-restart-start" "${reason}" "recovering" "" "${incident_id}"
systemctl stop "${B_SIDE_SERVICE}" || true systemctl stop "${B_SIDE_SERVICE}" || true
if systemctl restart "${ROS_SERVICE}"; then if systemctl restart "${ROS_SERVICE}"; then
@@ -345,9 +471,11 @@ full_restart_stack() {
else else
rc=$? rc=$?
blitz_log "${STEP}" "full-restart-start-bside" "failure" "reason=${reason}" "${rc}" blitz_log "${STEP}" "full-restart-start-bside" "failure" "reason=${reason}" "${rc}"
watchdog_append_event "event" "full-restart-failure" "${reason}" "recovering" "stage=start-bside rc=${rc}" "${incident_id}"
record_full_restart record_full_restart
return "${rc}" return "${rc}"
fi fi
watchdog_append_event "event" "full-restart-success" "${reason}" "recovering" "" "${incident_id}"
record_full_restart record_full_restart
} }
@@ -659,6 +787,7 @@ wait_for_network_recovery() {
perform_network_recovery() { perform_network_recovery() {
local rc=0 local rc=0
local incident_id=""
if resolve_network_interface && repair_network_routes "${NETWORK_LAST_INTERFACE}"; then if resolve_network_interface && repair_network_routes "${NETWORK_LAST_INTERFACE}"; then
set_last_action "route-repair" set_last_action "route-repair"
@@ -666,12 +795,15 @@ perform_network_recovery() {
NETWORK_COOLDOWN_UNTIL=$(( $(now_epoch_sec) + BLITZ_NETWORK_RECOVERY_COOLDOWN_SEC )) NETWORK_COOLDOWN_UNTIL=$(( $(now_epoch_sec) + BLITZ_NETWORK_RECOVERY_COOLDOWN_SEC ))
NETWORK_FAIL_COUNT=0 NETWORK_FAIL_COUNT=0
blitz_log "${STEP}" "network-recovery" "success" "mode=route-repair interface=${NETWORK_LAST_INTERFACE}" 0 blitz_log "${STEP}" "network-recovery" "success" "mode=route-repair interface=${NETWORK_LAST_INTERFACE}" 0
watchdog_append_event "event" "route-repair-success" "network_or_robot_unreachable" "recovering" "interface=${NETWORK_LAST_INTERFACE}" ""
return 0 return 0
fi fi
incident_id="$(watchdog_launch_incident "network-recovery" "blitz-5g-dial.service")"
set_last_action "network-recovery" set_last_action "network-recovery"
RECOVERY_ACTION_TAKEN=1 RECOVERY_ACTION_TAKEN=1
blitz_log "${STEP}" "network-recovery" "start" "fail_count=${NETWORK_FAIL_COUNT}" 0 blitz_log "${STEP}" "network-recovery" "start" "fail_count=${NETWORK_FAIL_COUNT}" 0
watchdog_append_event "event" "network-recovery-start" "network_or_robot_unreachable" "recovering" "fail_count=${NETWORK_FAIL_COUNT}" "${incident_id}"
systemctl stop "${B_SIDE_SERVICE}" || true systemctl stop "${B_SIDE_SERVICE}" || true
if bash "${BOOT_SCRIPT_DIR}/5g-dial.sh"; then if bash "${BOOT_SCRIPT_DIR}/5g-dial.sh"; then
@@ -679,6 +811,7 @@ perform_network_recovery() {
else else
rc=$? rc=$?
blitz_log "${STEP}" "network-redial" "failure" "fail_count=${NETWORK_FAIL_COUNT} script=${BOOT_SCRIPT_DIR}/5g-dial.sh" "${rc}" blitz_log "${STEP}" "network-redial" "failure" "fail_count=${NETWORK_FAIL_COUNT} script=${BOOT_SCRIPT_DIR}/5g-dial.sh" "${rc}"
watchdog_append_event "event" "network-recovery-failure" "network_or_robot_unreachable" "recovering" "stage=redial rc=${rc}" "${incident_id}"
return "${rc}" return "${rc}"
fi fi
@@ -687,11 +820,13 @@ perform_network_recovery() {
else else
rc=$? rc=$?
blitz_log "${STEP}" "network-recovery" "failure" "fail_count=${NETWORK_FAIL_COUNT} interface=${NETWORK_LAST_INTERFACE:-unresolved}" "${rc}" blitz_log "${STEP}" "network-recovery" "failure" "fail_count=${NETWORK_FAIL_COUNT} interface=${NETWORK_LAST_INTERFACE:-unresolved}" "${rc}"
watchdog_append_event "event" "network-recovery-failure" "network_or_robot_unreachable" "recovering" "stage=postcheck rc=${rc}" "${incident_id}"
return "${rc}" return "${rc}"
fi fi
NETWORK_COOLDOWN_UNTIL=$(( $(now_epoch_sec) + BLITZ_NETWORK_RECOVERY_COOLDOWN_SEC )) NETWORK_COOLDOWN_UNTIL=$(( $(now_epoch_sec) + BLITZ_NETWORK_RECOVERY_COOLDOWN_SEC ))
NETWORK_FAIL_COUNT=0 NETWORK_FAIL_COUNT=0
watchdog_append_event "event" "network-recovery-success" "network_or_robot_unreachable" "recovering" "interface=${NETWORK_LAST_INTERFACE:-unresolved}" "${incident_id}"
if ros_receiver_healthy "${BLITZ_HEALTH_STALE_SEC}"; then if ros_receiver_healthy "${BLITZ_HEALTH_STALE_SEC}"; then
restart_bside_targeted "network" "network-recovered" restart_bside_targeted "network" "network-recovered"
return 0 return 0
@@ -707,11 +842,14 @@ blitz_require_command stat "${STEP}"
blitz_require_command ping "${STEP}" blitz_require_command ping "${STEP}"
blitz_require_command python3 "${STEP}" blitz_require_command python3 "${STEP}"
blitz_prepare_runtime_dir blitz_prepare_runtime_dir
blitz_require_run_context
B_SIDE_STATUS_FILE="${BLITZ_RUNTIME_DIR}/b-side-omnid.status.json" B_SIDE_STATUS_FILE="${BLITZ_RUNTIME_DIR}/b-side-omnid.status.json"
ROS_STATUS_FILE="${BLITZ_RUNTIME_DIR}/ros-receiver.status.json" ROS_STATUS_FILE="${BLITZ_RUNTIME_DIR}/ros-receiver.status.json"
WATCHDOG_STATUS_FILE="${BLITZ_RUNTIME_DIR}/watchdog.status.json" WATCHDOG_STATUS_FILE="${BLITZ_RUNTIME_DIR}/watchdog.status.json"
NETWORK_FAULT_FILE="${BLITZ_RUNTIME_DIR}/fault-injection-network-down" NETWORK_FAULT_FILE="${BLITZ_RUNTIME_DIR}/fault-injection-network-down"
WATCHDOG_EVENT_LOG="${BLITZ_RUN_DIR}/watchdog-events.jsonl"
WATCHDOG_SAMPLE_LOG="${BLITZ_RUN_DIR}/watchdog-samples.jsonl"
while true; do while true; do
fault_reason="none" fault_reason="none"
@@ -735,7 +873,9 @@ while true; do
if (( BACKOFF_UNTIL > now_sec )); then if (( BACKOFF_UNTIL > now_sec )); then
fault_reason="backoff" fault_reason="backoff"
recovery_state="backoff" recovery_state="backoff"
watchdog_record_state_transition "${fault_reason}" "${recovery_state}"
write_watchdog_status "${fault_reason}" "${recovery_state}" 0 0 0 0 "${gps_ok}" "${gps_device_present}" write_watchdog_status "${fault_reason}" "${recovery_state}" 0 0 0 0 "${gps_ok}" "${gps_device_present}"
watchdog_append_sample "sample" "loop" "${fault_reason}" "${recovery_state}" "" "" 0 0 0 0 "${gps_ok}" "${gps_device_present}"
sleep "${BLITZ_WATCHDOG_INTERVAL_SEC}" sleep "${BLITZ_WATCHDOG_INTERVAL_SEC}"
continue continue
fi fi
@@ -824,6 +964,8 @@ while true; do
full_restart_stack "ros-unhealthy" || true full_restart_stack "ros-unhealthy" || true
fi fi
watchdog_record_state_transition "${fault_reason}" "${recovery_state}"
write_watchdog_status "${fault_reason}" "${recovery_state}" "${network_ok}" "${camera_ok}" "${ros_ok}" "${bside_ok}" "${gps_ok}" "${gps_device_present}" write_watchdog_status "${fault_reason}" "${recovery_state}" "${network_ok}" "${camera_ok}" "${ros_ok}" "${bside_ok}" "${gps_ok}" "${gps_device_present}"
watchdog_append_sample "sample" "loop" "${fault_reason}" "${recovery_state}" "" "" "${network_ok}" "${camera_ok}" "${ros_ok}" "${bside_ok}" "${gps_ok}" "${gps_device_present}"
sleep "${BLITZ_WATCHDOG_INTERVAL_SEC}" sleep "${BLITZ_WATCHDOG_INTERVAL_SEC}"
done done

View File

@@ -33,8 +33,10 @@ blitz_load_boot_env() {
return 0 return 0
fi fi
export BLITZ_BOOT_LOADING_ENV="1"
# shellcheck disable=SC1091 # shellcheck disable=SC1091
source "${DEV_SCRIPT_DIR}/load-env.sh" source "${DEV_SCRIPT_DIR}/load-env.sh"
unset BLITZ_BOOT_LOADING_ENV
for env_file in \ for env_file in \
"${BOOT_SCRIPT_DIR}/robot-boot.env" \ "${BOOT_SCRIPT_DIR}/robot-boot.env" \
@@ -51,8 +53,12 @@ blitz_load_boot_env() {
default_time_server="$(blitz_host_from_addr "${ROBOT_SIDE_OMNISOCKET_SERVER_ADDR:-}" || true)" default_time_server="$(blitz_host_from_addr "${ROBOT_SIDE_OMNISOCKET_SERVER_ADDR:-}" || true)"
export BLITZ_BOOT_DELAY_SEC="${BLITZ_BOOT_DELAY_SEC:-30}" export BLITZ_BOOT_DELAY_SEC="${BLITZ_BOOT_DELAY_SEC:-30}"
export BLITZ_RUN_ROOT="${BLITZ_RUN_ROOT:-/var/log/blitz-robot}"
export BLITZ_LOG_FILE="${BLITZ_LOG_FILE:-/var/log/blitz-robot/startup.log}" export BLITZ_LOG_FILE="${BLITZ_LOG_FILE:-/var/log/blitz-robot/startup.log}"
export BLITZ_RUNTIME_DIR="${BLITZ_RUNTIME_DIR:-/run/blitz-robot}" export BLITZ_RUNTIME_DIR="${BLITZ_RUNTIME_DIR:-/run/blitz-robot}"
export BLITZ_RUN_CONTEXT_FILE="${BLITZ_RUN_CONTEXT_FILE:-${BLITZ_RUNTIME_DIR}/run-context.env}"
export BLITZ_RUN_ID_FILE="${BLITZ_RUN_ID_FILE:-${BLITZ_RUNTIME_DIR}/run-id}"
export BLITZ_CURRENT_RUN_LINK="${BLITZ_CURRENT_RUN_LINK:-${BLITZ_RUN_ROOT}/current}"
export BLITZ_5G_DIAL_DIR="${BLITZ_5G_DIAL_DIR:-${BOOT_SCRIPT_DIR}}" export BLITZ_5G_DIAL_DIR="${BLITZ_5G_DIAL_DIR:-${BOOT_SCRIPT_DIR}}"
export BLITZ_5G_SERIAL_PORT="${BLITZ_5G_SERIAL_PORT:-/dev/ttyUSB7}" export BLITZ_5G_SERIAL_PORT="${BLITZ_5G_SERIAL_PORT:-/dev/ttyUSB7}"
export BLITZ_5G_INTERFACE="${BLITZ_5G_INTERFACE:-}" export BLITZ_5G_INTERFACE="${BLITZ_5G_INTERFACE:-}"
@@ -71,6 +77,16 @@ blitz_load_boot_env() {
export BLITZ_WATCHDOG_INTERVAL_SEC="${BLITZ_WATCHDOG_INTERVAL_SEC:-5}" export BLITZ_WATCHDOG_INTERVAL_SEC="${BLITZ_WATCHDOG_INTERVAL_SEC:-5}"
export BLITZ_HEALTH_STALE_SEC="${BLITZ_HEALTH_STALE_SEC:-15}" export BLITZ_HEALTH_STALE_SEC="${BLITZ_HEALTH_STALE_SEC:-15}"
export BLITZ_OMNID_THREAD_HEARTBEAT_TIMEOUT_SEC="${BLITZ_OMNID_THREAD_HEARTBEAT_TIMEOUT_SEC:-15}" export BLITZ_OMNID_THREAD_HEARTBEAT_TIMEOUT_SEC="${BLITZ_OMNID_THREAD_HEARTBEAT_TIMEOUT_SEC:-15}"
export BLITZ_KCP_STATS_INTERVAL_MS="${BLITZ_KCP_STATS_INTERVAL_MS:-1000}"
export BLITZ_CONTROL_LATENCY_LOG_ENABLED="${BLITZ_CONTROL_LATENCY_LOG_ENABLED:-1}"
export BLITZ_CONTROL_LATENCY_LOG_SAMPLE_MOD="${BLITZ_CONTROL_LATENCY_LOG_SAMPLE_MOD:-100}"
export BLITZ_5G_LINK_LOG_INTERVAL_SEC="${BLITZ_5G_LINK_LOG_INTERVAL_SEC:-5}"
export BLITZ_JSONL_FLUSH_INTERVAL_MS="${BLITZ_JSONL_FLUSH_INTERVAL_MS:-1000}"
export BLITZ_JSONL_FLUSH_BYTES="${BLITZ_JSONL_FLUSH_BYTES:-262144}"
export BLITZ_JSONL_ROTATE_BYTES="${BLITZ_JSONL_ROTATE_BYTES:-134217728}"
export BLITZ_JSONL_ROTATE_FILES="${BLITZ_JSONL_ROTATE_FILES:-8}"
export BLITZ_INCIDENT_COMMAND_TIMEOUT_SEC="${BLITZ_INCIDENT_COMMAND_TIMEOUT_SEC:-5}"
export BLITZ_INCIDENT_TOTAL_TIMEOUT_SEC="${BLITZ_INCIDENT_TOTAL_TIMEOUT_SEC:-30}"
export BLITZ_NETWORK_FAIL_THRESHOLD="${BLITZ_NETWORK_FAIL_THRESHOLD:-3}" export BLITZ_NETWORK_FAIL_THRESHOLD="${BLITZ_NETWORK_FAIL_THRESHOLD:-3}"
export BLITZ_NETWORK_RECOVERY_COOLDOWN_SEC="${BLITZ_NETWORK_RECOVERY_COOLDOWN_SEC:-30}" export BLITZ_NETWORK_RECOVERY_COOLDOWN_SEC="${BLITZ_NETWORK_RECOVERY_COOLDOWN_SEC:-30}"
export BLITZ_GPS_MONITOR_ENABLED="${BLITZ_GPS_MONITOR_ENABLED:-1}" export BLITZ_GPS_MONITOR_ENABLED="${BLITZ_GPS_MONITOR_ENABLED:-1}"
@@ -409,3 +425,212 @@ blitz_prepare_runtime_dir() {
fi fi
blitz_log "runtime-dir" "prepare" "success" "path=${runtime_dir}" 0 blitz_log "runtime-dir" "prepare" "success" "path=${runtime_dir}" 0
} }
blitz_prepare_run_root() {
local run_root
local run_dir
local incidents_dir
blitz_load_boot_env
run_root="${BLITZ_RUN_ROOT}"
run_dir="${run_root}/runs"
incidents_dir="${run_root}/incidents"
mkdir -p "${run_dir}" "${incidents_dir}"
if [[ "${EUID}" -eq 0 ]]; then
chown -R "root:${BLITZ_ROS_USER}" "${run_root}" 2>/dev/null || true
chmod 0775 "${run_root}" "${run_dir}" "${incidents_dir}" 2>/dev/null || true
fi
}
blitz_load_run_context_env() {
local context_file="${1:-${BLITZ_RUN_CONTEXT_FILE:-}}"
if [[ -z "${context_file}" || ! -f "${context_file}" ]]; then
return 1
fi
set -a
# shellcheck disable=SC1090
source "${context_file}"
set +a
return 0
}
blitz_read_run_id() {
local run_id_file="${BLITZ_RUN_ID_FILE:-}"
if [[ -z "${run_id_file}" || ! -f "${run_id_file}" ]]; then
return 1
fi
tr -d '\r\n' < "${run_id_file}"
}
blitz_utc_compact_timestamp() {
date -u '+%Y%m%dT%H%M%SZ'
}
blitz_new_run_id() {
printf '%s\n' "$(blitz_utc_compact_timestamp)"
}
blitz_new_incident_id() {
local prefix="${1:-incident}"
printf '%s-%s-%d\n' "${prefix}" "$(blitz_utc_compact_timestamp)" "$$"
}
blitz_new_instance_id() {
printf '%s-%d\n' "$(blitz_utc_compact_timestamp)" "$$"
}
blitz_git_commit() {
git -C "${OMNISOCKETGO_ROOT}" rev-parse HEAD 2>/dev/null || true
}
blitz_git_dirty_flag() {
if git -C "${OMNISOCKETGO_ROOT}" diff --quiet --ignore-submodules=dirty >/dev/null 2>&1; then
printf '0\n'
return 0
fi
printf '1\n'
}
blitz_write_run_context() {
local run_id="$1"
local run_dir="$2"
local boot_id="$3"
local context_file="${BLITZ_RUN_CONTEXT_FILE}"
local id_file="${BLITZ_RUN_ID_FILE}"
local temp_context
local temp_info
local commit_hash
local dirty_flag
local started_at
commit_hash="$(blitz_git_commit)"
dirty_flag="$(blitz_git_dirty_flag)"
started_at="$(date -u '+%Y-%m-%dT%H:%M:%SZ')"
temp_context="${context_file}.tmp.$$"
temp_info="${run_dir}/run-info.json.tmp.$$"
mkdir -p "${run_dir}"
printf '%s\n' "${run_id}" > "${id_file}"
cat > "${temp_context}" <<EOF
BLITZ_RUN_ID=${run_id}
BLITZ_RUN_DIR=${run_dir}
BLITZ_BOOT_ID=${boot_id}
BLITZ_RUN_ROOT=${BLITZ_RUN_ROOT}
EOF
mv -f "${temp_context}" "${context_file}"
python3 - "${temp_info}" "${run_id}" "${run_dir}" "${boot_id}" "${started_at}" "${commit_hash}" "${dirty_flag}" "${HOSTNAME:-$(hostname)}" <<'PY'
import json
import os
import sys
path, run_id, run_dir, boot_id, started_at, commit_hash, dirty_flag, hostname = sys.argv[1:9]
payload = {
"run_id": run_id,
"run_dir": run_dir,
"boot_id": boot_id,
"started_at": started_at,
"hostname": hostname,
"git_commit": commit_hash,
"git_dirty": dirty_flag == "1",
"env": {
key: os.environ.get(key, "")
for key in sorted(os.environ)
if key.startswith(("BLITZ_", "OMNI_", "ROBOT_RECEIVER_"))
},
}
with open(path, "w", encoding="utf-8") as handle:
json.dump(payload, handle, ensure_ascii=False, indent=2, sort_keys=True)
PY
mv -f "${temp_info}" "${run_dir}/run-info.json"
ln -sfn "${run_dir}" "${BLITZ_CURRENT_RUN_LINK}"
}
blitz_init_run_context() {
local run_id
local boot_id
local run_dir
blitz_load_boot_env
blitz_prepare_runtime_dir
blitz_prepare_run_root
run_id="$(blitz_new_run_id)"
boot_id="$(cat /proc/sys/kernel/random/boot_id 2>/dev/null || blitz_new_run_id)"
run_dir="${BLITZ_RUN_ROOT}/runs/${run_id}"
export BLITZ_RUN_ID="${run_id}"
export BLITZ_RUN_DIR="${run_dir}"
export BLITZ_BOOT_ID="${boot_id}"
blitz_write_run_context "${run_id}" "${run_dir}" "${boot_id}"
blitz_log "run-context" "init" "success" "run_id=${run_id} run_dir=${run_dir}" 0
}
blitz_require_run_context() {
blitz_load_boot_env
if blitz_load_run_context_env; then
return 0
fi
blitz_log "run-context" "load" "failure" "missing ${BLITZ_RUN_CONTEXT_FILE}" 1
return 1
}
blitz_ensure_instance_id() {
if [[ -n "${BLITZ_INSTANCE_ID:-}" ]]; then
return 0
fi
export BLITZ_INSTANCE_ID="$(blitz_new_instance_id)"
}
blitz_jsonl_rotate_if_needed() {
local path="$1"
local max_bytes="${2:-${BLITZ_JSONL_ROTATE_BYTES:-0}}"
local max_files="${3:-${BLITZ_JSONL_ROTATE_FILES:-0}}"
local size=0
local index
if [[ -z "${path}" || ! -f "${path}" ]]; then
return 0
fi
if (( max_bytes <= 0 || max_files <= 0 )); then
return 0
fi
size="$(stat -c %s "${path}" 2>/dev/null || echo 0)"
if (( size < max_bytes )); then
return 0
fi
for (( index=max_files; index>=1; index-- )); do
if [[ "${index}" -eq "${max_files}" ]]; then
rm -f "${path}.${index}"
fi
if [[ -f "${path}.${index}" ]]; then
mv -f "${path}.${index}" "${path}.$(( index + 1 ))"
fi
done
mv -f "${path}" "${path}.1"
}
blitz_jsonl_append_line() {
local path="$1"
local line="$2"
mkdir -p "$(dirname "${path}")"
blitz_jsonl_rotate_if_needed "${path}"
printf '%s\n' "${line}" >> "${path}"
}
blitz_launch_incident_capture() {
local launch_script="${BOOT_SCRIPT_DIR}/blitz-incident-capture-launch.sh"
if [[ ! -f "${launch_script}" ]]; then
return 1
fi
"${launch_script}" "$@" >/dev/null 2>&1 || return 1
}

View File

@@ -9,9 +9,11 @@ STEP="disable"
SYSTEMD_DEST_DIR="/etc/systemd/system" SYSTEMD_DEST_DIR="/etc/systemd/system"
UNITS=( UNITS=(
"blitz-watchdog.service" "blitz-watchdog.service"
"blitz-5g-link-logger.service"
"blitz-b-side-omnid.service" "blitz-b-side-omnid.service"
"blitz-ros-receiver.service" "blitz-ros-receiver.service"
"blitz-5g-dial.service" "blitz-5g-dial.service"
"blitz-run-context.service"
"blitz-boot-gate.service" "blitz-boot-gate.service"
"blitz-robot.target" "blitz-robot.target"
) )

View File

@@ -54,9 +54,12 @@ touch "${BLITZ_LOG_FILE}"
chmod 0644 "${BLITZ_LOG_FILE}" chmod 0644 "${BLITZ_LOG_FILE}"
blitz_log "install" "prepare-log-file" "success" "log_file=${BLITZ_LOG_FILE}" 0 blitz_log "install" "prepare-log-file" "success" "log_file=${BLITZ_LOG_FILE}" 0
blitz_prepare_runtime_dir blitz_prepare_runtime_dir
blitz_prepare_run_root
install_unit "blitz-boot-gate.service.in" install_unit "blitz-boot-gate.service.in"
install_unit "blitz-run-context.service.in"
install_unit "blitz-5g-dial.service.in" install_unit "blitz-5g-dial.service.in"
install_unit "blitz-5g-link-logger.service.in"
install_unit "blitz-ros-receiver.service.in" install_unit "blitz-ros-receiver.service.in"
install_unit "blitz-b-side-omnid.service.in" install_unit "blitz-b-side-omnid.service.in"
install_unit "blitz-watchdog.service.in" install_unit "blitz-watchdog.service.in"

View File

@@ -2,6 +2,7 @@
# Override machine-specific values in robot-boot.env.local. # Override machine-specific values in robot-boot.env.local.
BLITZ_BOOT_DELAY_SEC="30" BLITZ_BOOT_DELAY_SEC="30"
BLITZ_RUN_ROOT="/var/log/blitz-robot"
BLITZ_LOG_FILE="/var/log/blitz-robot/startup.log" BLITZ_LOG_FILE="/var/log/blitz-robot/startup.log"
BLITZ_RUNTIME_DIR="/run/blitz-robot" BLITZ_RUNTIME_DIR="/run/blitz-robot"
@@ -25,6 +26,16 @@ BLITZ_ROS_SOCKET_WAIT_SEC="20"
BLITZ_WATCHDOG_INTERVAL_SEC="5" BLITZ_WATCHDOG_INTERVAL_SEC="5"
BLITZ_HEALTH_STALE_SEC="15" BLITZ_HEALTH_STALE_SEC="15"
BLITZ_OMNID_THREAD_HEARTBEAT_TIMEOUT_SEC="15" BLITZ_OMNID_THREAD_HEARTBEAT_TIMEOUT_SEC="15"
BLITZ_KCP_STATS_INTERVAL_MS="1000"
BLITZ_CONTROL_LATENCY_LOG_ENABLED="1"
BLITZ_CONTROL_LATENCY_LOG_SAMPLE_MOD="100"
BLITZ_5G_LINK_LOG_INTERVAL_SEC="5"
BLITZ_JSONL_FLUSH_INTERVAL_MS="1000"
BLITZ_JSONL_FLUSH_BYTES="262144"
BLITZ_JSONL_ROTATE_BYTES="134217728"
BLITZ_JSONL_ROTATE_FILES="8"
BLITZ_INCIDENT_COMMAND_TIMEOUT_SEC="5"
BLITZ_INCIDENT_TOTAL_TIMEOUT_SEC="30"
BLITZ_NETWORK_FAIL_THRESHOLD="3" BLITZ_NETWORK_FAIL_THRESHOLD="3"
BLITZ_NETWORK_RECOVERY_COOLDOWN_SEC="30" BLITZ_NETWORK_RECOVERY_COOLDOWN_SEC="30"
BLITZ_GPS_MONITOR_ENABLED="1" BLITZ_GPS_MONITOR_ENABLED="1"

View File

@@ -0,0 +1,18 @@
#!/usr/bin/env bash
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
# shellcheck disable=SC1091
source "${SCRIPT_DIR}/common.sh"
STEP="5g-link-logger-service"
blitz_load_boot_env
blitz_require_run_context
export OMNI_BOOT_MODE="1"
export BLITZ_INSTANCE_ID="${BLITZ_INSTANCE_ID:-$(blitz_new_instance_id)}"
export BLITZ_5G_LINK_LOG_PATH="${BLITZ_5G_LINK_LOG_PATH:-${BLITZ_RUN_DIR}/b-5g-link-quality.${BLITZ_INSTANCE_ID}.jsonl}"
blitz_log "${STEP}" "start" "start" "exec bash ${OMNISOCKETGO_ROOT}/scripts/boot/blitz-5g-link-logger.sh" 0
exec bash "${OMNISOCKETGO_ROOT}/scripts/boot/blitz-5g-link-logger.sh"

View File

@@ -8,6 +8,7 @@ source "${SCRIPT_DIR}/common.sh"
STEP="b-side-omnid" STEP="b-side-omnid"
blitz_load_boot_env blitz_load_boot_env
blitz_require_run_context
blitz_require_executable "${OMNISOCKETGO_ROOT}/bin/b_side_omnid" "${STEP}" blitz_require_executable "${OMNISOCKETGO_ROOT}/bin/b_side_omnid" "${STEP}"

View File

@@ -8,6 +8,7 @@ source "${SCRIPT_DIR}/common.sh"
STEP="ros-receiver" STEP="ros-receiver"
blitz_load_boot_env blitz_load_boot_env
blitz_require_run_context
blitz_require_file "/opt/ros/${ROS_DISTRO}/setup.bash" "${STEP}" blitz_require_file "/opt/ros/${ROS_DISTRO}/setup.bash" "${STEP}"
blitz_require_file "${ROS_CONTROL_PY_DIR}/install/setup.bash" "${STEP}" blitz_require_file "${ROS_CONTROL_PY_DIR}/install/setup.bash" "${STEP}"

View File

@@ -1,7 +1,8 @@
[Unit] [Unit]
Description=Blitz robot 5G dial Description=Blitz robot 5G dial
After=blitz-boot-gate.service PartOf=blitz-robot.target
Requires=blitz-boot-gate.service After=blitz-run-context.service
Requires=blitz-run-context.service
[Service] [Service]
Type=oneshot Type=oneshot

View File

@@ -0,0 +1,18 @@
[Unit]
Description=Blitz robot 5G link logger
PartOf=blitz-robot.target
After=blitz-run-context.service blitz-5g-dial.service
Wants=blitz-run-context.service blitz-5g-dial.service
[Service]
Type=simple
EnvironmentFile=-/run/blitz-robot/run-context.env
ExecStartPre=/bin/bash @OMNISOCKETGO_ROOT@/scripts/boot/prepare-runtime-dir.sh
ExecStart=/bin/bash @OMNISOCKETGO_ROOT@/scripts/boot/start-5g-link-logger-service.sh
Restart=always
RestartSec=5
StandardOutput=append:@BLITZ_LOG_FILE@
StandardError=append:@BLITZ_LOG_FILE@
[Install]
WantedBy=blitz-robot.target

View File

@@ -1,12 +1,15 @@
[Unit] [Unit]
Description=Blitz robot b-side omnid Description=Blitz robot b-side omnid
After=blitz-5g-dial.service blitz-ros-receiver.service PartOf=blitz-robot.target
Wants=blitz-5g-dial.service blitz-ros-receiver.service After=blitz-run-context.service blitz-5g-dial.service blitz-ros-receiver.service
Wants=blitz-run-context.service blitz-5g-dial.service blitz-ros-receiver.service
[Service] [Service]
Type=simple Type=simple
EnvironmentFile=-/run/blitz-robot/run-context.env
ExecStartPre=/bin/bash @OMNISOCKETGO_ROOT@/scripts/boot/prepare-runtime-dir.sh ExecStartPre=/bin/bash @OMNISOCKETGO_ROOT@/scripts/boot/prepare-runtime-dir.sh
ExecStart=/bin/bash @OMNISOCKETGO_ROOT@/scripts/boot/start-b-side-omnid-service.sh ExecStart=/bin/bash @OMNISOCKETGO_ROOT@/scripts/boot/start-b-side-omnid-service.sh
ExecStopPost=/bin/bash -lc 'if [[ "${SERVICE_RESULT:-success}" != "success" ]]; then exec "@OMNISOCKETGO_ROOT@/scripts/boot/blitz-incident-capture-launch.sh" --source exec-stop-post --unit "%n" --result "${SERVICE_RESULT:-}" --exit-status "${EXIT_STATUS:-}" --reason b-side-service-exit; fi'
Restart=always Restart=always
RestartSec=2 RestartSec=2
StandardOutput=append:@BLITZ_LOG_FILE@ StandardOutput=append:@BLITZ_LOG_FILE@

View File

@@ -1,5 +1,6 @@
[Unit] [Unit]
Description=Blitz robot boot gate Description=Blitz robot boot gate
PartOf=blitz-robot.target
After=multi-user.target network-online.target After=multi-user.target network-online.target
Wants=network-online.target Wants=network-online.target

View File

@@ -1,7 +1,9 @@
[Unit] [Unit]
Description=Blitz robot boot chain Description=Blitz robot boot chain
Wants=blitz-boot-gate.service Wants=blitz-boot-gate.service
Wants=blitz-run-context.service
Wants=blitz-5g-dial.service Wants=blitz-5g-dial.service
Wants=blitz-5g-link-logger.service
Wants=blitz-ros-receiver.service Wants=blitz-ros-receiver.service
Wants=blitz-b-side-omnid.service Wants=blitz-b-side-omnid.service
Wants=blitz-watchdog.service Wants=blitz-watchdog.service

View File

@@ -1,15 +1,18 @@
[Unit] [Unit]
Description=Blitz robot ROS receiver Description=Blitz robot ROS receiver
After=blitz-5g-dial.service PartOf=blitz-robot.target
Wants=blitz-5g-dial.service After=blitz-run-context.service blitz-5g-dial.service
Wants=blitz-run-context.service blitz-5g-dial.service
[Service] [Service]
Type=simple Type=simple
User=@BLITZ_ROS_USER@ User=@BLITZ_ROS_USER@
PermissionsStartOnly=true PermissionsStartOnly=true
EnvironmentFile=-/run/blitz-robot/run-context.env
ExecStartPre=/bin/bash @OMNISOCKETGO_ROOT@/scripts/boot/prepare-runtime-dir.sh ExecStartPre=/bin/bash @OMNISOCKETGO_ROOT@/scripts/boot/prepare-runtime-dir.sh
ExecStart=/bin/bash @OMNISOCKETGO_ROOT@/scripts/boot/start-ros-receiver-service.sh ExecStart=/bin/bash @OMNISOCKETGO_ROOT@/scripts/boot/start-ros-receiver-service.sh
ExecStartPost=/bin/bash @OMNISOCKETGO_ROOT@/scripts/boot/wait-for-unix-socket.sh --step ros-receiver ExecStartPost=/bin/bash @OMNISOCKETGO_ROOT@/scripts/boot/wait-for-unix-socket.sh --step ros-receiver
ExecStopPost=/bin/bash -lc 'if [[ "${SERVICE_RESULT:-success}" != "success" ]]; then exec "@OMNISOCKETGO_ROOT@/scripts/boot/blitz-incident-capture-launch.sh" --source exec-stop-post --unit "%n" --result "${SERVICE_RESULT:-}" --exit-status "${EXIT_STATUS:-}" --reason ros-service-exit; fi'
Restart=always Restart=always
RestartSec=2 RestartSec=2
StandardOutput=append:@BLITZ_LOG_FILE@ StandardOutput=append:@BLITZ_LOG_FILE@

View File

@@ -0,0 +1,15 @@
[Unit]
Description=Blitz robot run context
PartOf=blitz-robot.target
After=blitz-boot-gate.service
Requires=blitz-boot-gate.service
[Service]
Type=oneshot
RemainAfterExit=yes
ExecStart=/bin/bash @OMNISOCKETGO_ROOT@/scripts/boot/blitz-run-context.sh
StandardOutput=append:@BLITZ_LOG_FILE@
StandardError=append:@BLITZ_LOG_FILE@
[Install]
WantedBy=blitz-robot.target

View File

@@ -1,10 +1,12 @@
[Unit] [Unit]
Description=Blitz robot health watchdog Description=Blitz robot health watchdog
After=blitz-b-side-omnid.service blitz-ros-receiver.service PartOf=blitz-robot.target
Wants=blitz-b-side-omnid.service blitz-ros-receiver.service After=blitz-run-context.service blitz-b-side-omnid.service blitz-ros-receiver.service
Wants=blitz-run-context.service blitz-b-side-omnid.service blitz-ros-receiver.service
[Service] [Service]
Type=simple Type=simple
EnvironmentFile=-/run/blitz-robot/run-context.env
ExecStartPre=/bin/bash @OMNISOCKETGO_ROOT@/scripts/boot/prepare-runtime-dir.sh ExecStartPre=/bin/bash @OMNISOCKETGO_ROOT@/scripts/boot/prepare-runtime-dir.sh
ExecStart=/bin/bash @OMNISOCKETGO_ROOT@/scripts/boot/blitz-watchdog.sh ExecStart=/bin/bash @OMNISOCKETGO_ROOT@/scripts/boot/blitz-watchdog.sh
Restart=always Restart=always

View File

@@ -134,3 +134,162 @@ export OMNI_CONTROL_SERVER_ADDR="${OMNI_CONTROL_SERVER_ADDR:-${ROBOT_SIDE_OMNISO
export OMNI_CONTROL_RELAY_VIA="${OMNI_CONTROL_RELAY_VIA:-${ROBOT_SIDE_OMNISOCKET_RELAY_VIA:-}}" export OMNI_CONTROL_RELAY_VIA="${OMNI_CONTROL_RELAY_VIA:-${ROBOT_SIDE_OMNISOCKET_RELAY_VIA:-}}"
export OMNI_CONTROL_UNIX_SOCKET_PATH="${OMNI_CONTROL_UNIX_SOCKET_PATH:-${ROBOT_RECEIVER_LOCAL_SOCKET_PATH}}" export OMNI_CONTROL_UNIX_SOCKET_PATH="${OMNI_CONTROL_UNIX_SOCKET_PATH:-${ROBOT_RECEIVER_LOCAL_SOCKET_PATH}}"
export B_SIDE_OMNID_USE_SUDO="${B_SIDE_OMNID_USE_SUDO:-1}" export B_SIDE_OMNID_USE_SUDO="${B_SIDE_OMNID_USE_SUDO:-1}"
export BLITZ_RUNTIME_DIR="${BLITZ_RUNTIME_DIR:-${OMNISOCKETGO_ROOT}/logs/runtime}"
export BLITZ_RUN_ROOT="${BLITZ_RUN_ROOT:-${OMNISOCKETGO_ROOT}/logs}"
export BLITZ_RUN_CONTEXT_FILE="${BLITZ_RUN_CONTEXT_FILE:-${BLITZ_RUNTIME_DIR}/run-context.env}"
export BLITZ_RUN_ID_FILE="${BLITZ_RUN_ID_FILE:-${BLITZ_RUNTIME_DIR}/run-id}"
export BLITZ_CURRENT_RUN_LINK="${BLITZ_CURRENT_RUN_LINK:-${BLITZ_RUN_ROOT}/current}"
export BLITZ_5G_INTERFACE="${BLITZ_5G_INTERFACE:-}"
export BLITZ_5G_MODEM_SUBNET="${BLITZ_5G_MODEM_SUBNET:-192.168.224.0/22}"
export BLITZ_5G_GATEWAY="${BLITZ_5G_GATEWAY:-192.168.225.1}"
export BLITZ_5G_ROUTE_TARGETS="${BLITZ_5G_ROUTE_TARGETS:-106.55.173.235}"
export BLITZ_5G_INFO_JSON="${BLITZ_5G_INFO_JSON:-${OMNISOCKETGO_ROOT}/scripts/boot/modem_network_info.json}"
export BLITZ_TIME_SERVER_IP="${BLITZ_TIME_SERVER_IP:-}"
export BLITZ_KCP_STATS_INTERVAL_MS="${BLITZ_KCP_STATS_INTERVAL_MS:-1000}"
export BLITZ_CONTROL_LATENCY_LOG_ENABLED="${BLITZ_CONTROL_LATENCY_LOG_ENABLED:-1}"
export BLITZ_CONTROL_LATENCY_LOG_SAMPLE_MOD="${BLITZ_CONTROL_LATENCY_LOG_SAMPLE_MOD:-100}"
export BLITZ_5G_LINK_LOG_INTERVAL_SEC="${BLITZ_5G_LINK_LOG_INTERVAL_SEC:-5}"
export BLITZ_JSONL_FLUSH_INTERVAL_MS="${BLITZ_JSONL_FLUSH_INTERVAL_MS:-1000}"
export BLITZ_JSONL_FLUSH_BYTES="${BLITZ_JSONL_FLUSH_BYTES:-262144}"
export BLITZ_JSONL_ROTATE_BYTES="${BLITZ_JSONL_ROTATE_BYTES:-134217728}"
export BLITZ_JSONL_ROTATE_FILES="${BLITZ_JSONL_ROTATE_FILES:-8}"
blitz_dev_utc_compact_timestamp() {
date -u '+%Y%m%dT%H%M%SZ'
}
blitz_dev_git_commit() {
git -C "${OMNISOCKETGO_ROOT}" rev-parse HEAD 2>/dev/null || true
}
blitz_dev_git_dirty_flag() {
if git -C "${OMNISOCKETGO_ROOT}" diff --quiet --ignore-submodules=dirty >/dev/null 2>&1; then
printf '0\n'
return 0
fi
printf '1\n'
}
blitz_dev_prepare_dirs() {
mkdir -p "${BLITZ_RUNTIME_DIR}" "${BLITZ_RUN_ROOT}/runs" "${BLITZ_RUN_ROOT}/incidents"
}
blitz_dev_write_run_info() {
local run_dir="$1"
local run_id="$2"
local boot_id="$3"
local tmp_info="${run_dir}/run-info.json.tmp.$$"
local started_at
local commit_hash
local dirty_flag
started_at="$(date -u '+%Y-%m-%dT%H:%M:%SZ')"
commit_hash="$(blitz_dev_git_commit)"
dirty_flag="$(blitz_dev_git_dirty_flag)"
python3 - "${tmp_info}" "${run_id}" "${run_dir}" "${boot_id}" "${started_at}" "${commit_hash}" "${dirty_flag}" "${HOSTNAME:-$(hostname)}" <<'PY'
import json
import os
import sys
path, run_id, run_dir, boot_id, started_at, commit_hash, dirty_flag, hostname = sys.argv[1:9]
payload = {
"run_id": run_id,
"run_dir": run_dir,
"boot_id": boot_id,
"started_at": started_at,
"hostname": hostname,
"git_commit": commit_hash,
"git_dirty": dirty_flag == "1",
"env": {
key: os.environ.get(key, "")
for key in sorted(os.environ)
if key.startswith(("BLITZ_", "OMNI_", "ROBOT_RECEIVER_"))
},
}
with open(path, "w", encoding="utf-8") as handle:
json.dump(payload, handle, ensure_ascii=False, indent=2, sort_keys=True)
PY
mv -f "${tmp_info}" "${run_dir}/run-info.json"
}
blitz_dev_init_run_context() {
local run_id="${1:-$(blitz_dev_utc_compact_timestamp)}"
local boot_id="dev-$(blitz_dev_utc_compact_timestamp)"
local run_dir="${BLITZ_RUN_ROOT}/runs/${run_id}"
local tmp_context="${BLITZ_RUN_CONTEXT_FILE}.tmp.$$"
blitz_dev_prepare_dirs
mkdir -p "${run_dir}"
export BLITZ_RUN_ID="${run_id}"
export BLITZ_RUN_DIR="${run_dir}"
export BLITZ_BOOT_ID="${boot_id}"
printf '%s\n' "${run_id}" > "${BLITZ_RUN_ID_FILE}"
cat > "${tmp_context}" <<EOF
BLITZ_RUN_ID=${run_id}
BLITZ_RUN_DIR=${run_dir}
BLITZ_BOOT_ID=${boot_id}
BLITZ_RUN_ROOT=${BLITZ_RUN_ROOT}
EOF
mv -f "${tmp_context}" "${BLITZ_RUN_CONTEXT_FILE}"
ln -sfn "${run_dir}" "${BLITZ_CURRENT_RUN_LINK}"
blitz_dev_write_run_info "${run_dir}" "${run_id}" "${boot_id}"
}
blitz_dev_load_run_context() {
if [[ ! -f "${BLITZ_RUN_CONTEXT_FILE}" ]]; then
return 1
fi
set -a
# shellcheck disable=SC1090
source "${BLITZ_RUN_CONTEXT_FILE}"
set +a
}
blitz_dev_ensure_run_context() {
if blitz_dev_load_run_context; then
return 0
fi
blitz_dev_init_run_context
}
blitz_dev_reset_run_context() {
rm -f "${BLITZ_RUN_CONTEXT_FILE}" "${BLITZ_RUN_ID_FILE}"
blitz_dev_init_run_context
}
blitz_dev_init_instance_context() {
if [[ -z "${BLITZ_INSTANCE_ID:-}" ]]; then
export BLITZ_INSTANCE_ID="$(blitz_dev_utc_compact_timestamp)-$$"
fi
}
blitz_dev_component_log_path() {
local stem="$1"
printf '%s/%s.%s.jsonl\n' "${BLITZ_RUN_DIR}" "${stem}" "${BLITZ_INSTANCE_ID}"
}
blitz_dev_prepare_backend_logging_env() {
blitz_dev_init_instance_context
if [[ "${OMNI_NETWORK_SUMMARY_LOG_PATH}" == "${OMNISOCKETGO_ROOT}/logs/a-network-summary.jsonl" ]]; then
export OMNI_NETWORK_SUMMARY_LOG_PATH
OMNI_NETWORK_SUMMARY_LOG_PATH="$(blitz_dev_component_log_path "a-network-summary")"
fi
}
blitz_dev_prepare_bside_logging_env() {
blitz_dev_init_instance_context
export BLITZ_KCP_STATS_LOG_PATH="${BLITZ_KCP_STATS_LOG_PATH:-$(blitz_dev_component_log_path "b-kcp-session-stats")}"
export BLITZ_CONTROL_LATENCY_LOG_PATH="${BLITZ_CONTROL_LATENCY_LOG_PATH:-$(blitz_dev_component_log_path "b-control-latency")}"
}
blitz_dev_prepare_5g_logging_env() {
blitz_dev_init_instance_context
export BLITZ_5G_LINK_LOG_PATH="${BLITZ_5G_LINK_LOG_PATH:-$(blitz_dev_component_log_path "b-5g-link-quality")}"
}
if [[ "${BLITZ_SKIP_DEV_RUN_CONTEXT_INIT:-0}" != "1" && "${BLITZ_BOOT_LOADING_ENV:-0}" != "1" && "${OMNI_BOOT_MODE:-0}" != "1" ]]; then
blitz_dev_ensure_run_context
elif [[ -f "${BLITZ_RUN_CONTEXT_FILE}" ]]; then
blitz_dev_load_run_context || true
fi

View File

@@ -0,0 +1,10 @@
#!/usr/bin/env bash
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
export BLITZ_SKIP_DEV_RUN_CONTEXT_INIT="1"
# shellcheck disable=SC1091
source "${SCRIPT_DIR}/load-env.sh"
blitz_dev_reset_run_context
printf 'run_id=%s\nrun_dir=%s\n' "${BLITZ_RUN_ID}" "${BLITZ_RUN_DIR}"

View File

@@ -0,0 +1,9 @@
#!/usr/bin/env bash
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
# shellcheck disable=SC1091
source "${SCRIPT_DIR}/load-env.sh"
blitz_dev_prepare_5g_logging_env
exec bash "${OMNISOCKETGO_ROOT}/scripts/boot/blitz-5g-link-logger.sh"

View File

@@ -4,6 +4,7 @@ set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
# shellcheck disable=SC1091 # shellcheck disable=SC1091
source "${SCRIPT_DIR}/load-env.sh" source "${SCRIPT_DIR}/load-env.sh"
blitz_dev_prepare_bside_logging_env
cd "${OMNISOCKETGO_ROOT}" cd "${OMNISOCKETGO_ROOT}"
@@ -14,6 +15,24 @@ export OMNI_VIDEO_RELAY_VIA="${OMNI_VIDEO_RELAY_VIA}"
export OMNI_CONTROL_SERVER_ADDR="${OMNI_CONTROL_SERVER_ADDR}" export OMNI_CONTROL_SERVER_ADDR="${OMNI_CONTROL_SERVER_ADDR}"
export OMNI_CONTROL_RELAY_VIA="${OMNI_CONTROL_RELAY_VIA}" export OMNI_CONTROL_RELAY_VIA="${OMNI_CONTROL_RELAY_VIA}"
logger_pid=""
cleanup() {
if [[ -n "${logger_pid}" ]]; then
kill "${logger_pid}" 2>/dev/null || true
wait "${logger_pid}" 2>/dev/null || true
fi
}
start_5g_link_logger_if_needed() {
if [[ "${OMNI_BOOT_MODE:-0}" == "1" ]]; then
return 0
fi
bash "${SCRIPT_DIR}/start-5g-link-logger.sh" &
logger_pid=$!
echo "[start-b-side-omnid] 5G link logger -> ${BLITZ_5G_LINK_LOG_PATH:-unset}" >&2
}
if [[ ! -x "./bin/b_side_omnid" ]]; then if [[ ! -x "./bin/b_side_omnid" ]]; then
if [[ "${OMNI_BOOT_MODE:-0}" == "1" ]]; then if [[ "${OMNI_BOOT_MODE:-0}" == "1" ]]; then
echo "Missing ./bin/b_side_omnid in boot mode; build it before enabling the autostart service." >&2 echo "Missing ./bin/b_side_omnid in boot mode; build it before enabling the autostart service." >&2
@@ -23,12 +42,14 @@ if [[ ! -x "./bin/b_side_omnid" ]]; then
fi fi
launch_b_side_omnid() { launch_b_side_omnid() {
trap cleanup EXIT INT TERM
start_5g_link_logger_if_needed
bash "${SCRIPT_DIR}/apply-camera-controls.sh" bash "${SCRIPT_DIR}/apply-camera-controls.sh"
exec ./bin/b_side_omnid ./bin/b_side_omnid
} }
if [[ "${B_SIDE_OMNID_USE_SUDO}" == "1" && "${EUID}" -ne 0 ]]; then if [[ "${B_SIDE_OMNID_USE_SUDO}" == "1" && "${EUID}" -ne 0 ]]; then
exec sudo -E bash -lc 'cd "$1" && bash "$2" && exec "$3"' _ "${OMNISOCKETGO_ROOT}" "${SCRIPT_DIR}/apply-camera-controls.sh" "./bin/b_side_omnid" exec sudo -E bash -lc 'cd "$1" && export B_SIDE_OMNID_USE_SUDO=0 && exec bash "$2"' _ "${OMNISOCKETGO_ROOT}" "${SCRIPT_DIR}/start-b-side-omnid.sh"
fi fi
launch_b_side_omnid launch_b_side_omnid

View File

@@ -5,6 +5,7 @@ SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
# shellcheck disable=SC1091 # shellcheck disable=SC1091
source "${SCRIPT_DIR}/load-env.sh" source "${SCRIPT_DIR}/load-env.sh"
require_robot_command_center_root require_robot_command_center_root
blitz_dev_prepare_backend_logging_env
if [[ ! -d "${PYTHON_VENV_PATH}" ]]; then if [[ ! -d "${PYTHON_VENV_PATH}" ]]; then
"${PYTHON3_BIN}" -m venv "${PYTHON_VENV_PATH}" "${PYTHON3_BIN}" -m venv "${PYTHON_VENV_PATH}"

View File

@@ -18,7 +18,7 @@ kcp_packet_debug_logger_t *kcp_packet_debug_open_jsonl(const char *path) {
fclose(file); fclose(file);
return NULL; return NULL;
} }
omni_file_logger_init(&logger->file_logger, file); omni_file_logger_init_path(&logger->file_logger, file, path, 0);
logger->enabled = 1; logger->enabled = 1;
return logger; return logger;
} }

View File

@@ -73,7 +73,7 @@ kcp_session_stats_logger_t *kcp_session_stats_open_jsonl(const char *path) {
fclose(file); fclose(file);
return NULL; return NULL;
} }
omni_file_logger_init(&logger->file_logger, file); omni_file_logger_init_path(&logger->file_logger, file, path, 0);
logger->enabled = 1; logger->enabled = 1;
return logger; return logger;
} }

View File

@@ -32,7 +32,7 @@ latency_logger_t *latencylog_open_jsonl(const char *path) {
fclose(file); fclose(file);
return NULL; return NULL;
} }
omni_file_logger_init(&logger->file_logger, file); omni_file_logger_init_path(&logger->file_logger, file, path, 0);
logger->enabled = 1; logger->enabled = 1;
return logger; return logger;
} }

View File

@@ -544,9 +544,217 @@ const char *omni_path_base_name(const char *path) {
return slash == NULL ? path : slash + 1; return slash == NULL ? path : slash + 1;
} }
static uint64_t omni_now_monotonic_ms64(void) {
struct timespec ts;
clock_gettime(CLOCK_MONOTONIC, &ts);
return (uint64_t) ts.tv_sec * 1000ULL + (uint64_t) (ts.tv_nsec / 1000000L);
}
static int omni_positive_int_env(const char *name, int default_value) {
const char *raw = getenv(name);
long parsed;
char *endptr = NULL;
if (raw == NULL || raw[0] == '\0') {
return default_value;
}
parsed = strtol(raw, &endptr, 10);
if (endptr == raw || *endptr != '\0' || parsed <= 0) {
return default_value;
}
return (int) parsed;
}
static size_t omni_positive_size_env(const char *name, size_t default_value) {
const char *raw = getenv(name);
unsigned long long parsed;
char *endptr = NULL;
if (raw == NULL || raw[0] == '\0') {
return default_value;
}
parsed = strtoull(raw, &endptr, 10);
if (endptr == raw || *endptr != '\0' || parsed == 0ULL) {
return default_value;
}
return (size_t) parsed;
}
static int omni_file_logger_flush_locked(omni_file_logger_t *logger, uint64_t now_ms) {
if (logger == NULL || logger->file == NULL) {
errno = EINVAL;
return -1;
}
if (fflush(logger->file) != 0) {
return -1;
}
logger->buffered_bytes = 0U;
logger->last_flush_monotonic_ms = now_ms;
return 0;
}
static int omni_build_rotated_path(char *buffer, size_t buffer_len, const char *path, int suffix) {
size_t path_len;
int written;
if (buffer == NULL || buffer_len == 0U || path == NULL || path[0] == '\0') {
errno = EINVAL;
return -1;
}
path_len = strlen(path);
if (path_len + 16U >= buffer_len) {
errno = ENAMETOOLONG;
return -1;
}
memcpy(buffer, path, path_len);
written = snprintf(buffer + path_len, buffer_len - path_len, ".%d", suffix);
if (written < 0 || (size_t) written >= buffer_len - path_len) {
errno = ENAMETOOLONG;
return -1;
}
return 0;
}
static int omni_file_logger_reopen_append_locked(omni_file_logger_t *logger) {
struct stat st;
FILE *file;
if (logger == NULL || logger->path[0] == '\0') {
errno = EINVAL;
return -1;
}
file = fopen(logger->path, "ab");
if (file == NULL) {
return -1;
}
logger->file = file;
logger->current_bytes = 0U;
if (stat(logger->path, &st) == 0) {
logger->current_bytes = (size_t) st.st_size;
}
logger->buffered_bytes = 0U;
logger->last_flush_monotonic_ms = omni_now_monotonic_ms64();
return 0;
}
static int omni_file_logger_recover_after_rotate_locked(omni_file_logger_t *logger, const char *rotated_current_path) {
int reopen_errno;
if (omni_file_logger_reopen_append_locked(logger) == 0) {
return 0;
}
reopen_errno = errno;
if (rotated_current_path != NULL && rotated_current_path[0] != '\0') {
if (rename(rotated_current_path, logger->path) == 0) {
if (omni_file_logger_reopen_append_locked(logger) == 0) {
return 0;
}
}
}
errno = reopen_errno;
return -1;
}
static int omni_file_logger_rotate_locked(omni_file_logger_t *logger) {
int index;
int saved_errno = 0;
int should_recover = 0;
char rotated_current_path[PATH_MAX];
char from_path[PATH_MAX];
char to_path[PATH_MAX];
if (logger == NULL || logger->path[0] == '\0' || logger->max_bytes == 0U || logger->max_files <= 0) {
return 0;
}
rotated_current_path[0] = '\0';
if (logger->file != NULL) {
if (omni_file_logger_flush_locked(logger, omni_now_monotonic_ms64()) != 0) {
return -1;
}
should_recover = 1;
if (fclose(logger->file) != 0) {
logger->file = NULL;
saved_errno = errno;
goto recover;
}
logger->file = NULL;
}
if (omni_build_rotated_path(from_path, sizeof(from_path), logger->path, logger->max_files) != 0) {
saved_errno = errno;
goto recover;
}
unlink(from_path);
for (index = logger->max_files - 1; index >= 1; --index) {
if (omni_build_rotated_path(from_path, sizeof(from_path), logger->path, index) != 0 ||
omni_build_rotated_path(to_path, sizeof(to_path), logger->path, index + 1) != 0) {
saved_errno = errno;
goto recover;
}
if (rename(from_path, to_path) != 0 && errno != ENOENT) {
saved_errno = errno;
goto recover;
}
}
if (omni_build_rotated_path(to_path, sizeof(to_path), logger->path, 1) != 0) {
saved_errno = errno;
goto recover;
}
if (rename(logger->path, to_path) != 0 && errno != ENOENT) {
saved_errno = errno;
goto recover;
}
snprintf(rotated_current_path, sizeof(rotated_current_path), "%s", to_path);
if (omni_file_logger_reopen_append_locked(logger) != 0) {
saved_errno = errno;
goto recover;
}
return 0;
recover:
if (should_recover) {
int recover_errno = saved_errno != 0 ? saved_errno : errno;
if (omni_file_logger_recover_after_rotate_locked(logger, rotated_current_path) == 0) {
errno = recover_errno;
} else if (saved_errno != 0) {
errno = saved_errno;
}
} else if (saved_errno != 0) {
errno = saved_errno;
}
return -1;
}
void omni_file_logger_init(omni_file_logger_t *logger, FILE *file) { void omni_file_logger_init(omni_file_logger_t *logger, FILE *file) {
memset(logger, 0, sizeof(*logger));
logger->file = file; logger->file = file;
pthread_mutex_init(&logger->mutex, NULL); pthread_mutex_init(&logger->mutex, NULL);
logger->flush_bytes = 1U;
logger->flush_interval_ms = 0;
logger->immediate_flush = 1;
logger->last_flush_monotonic_ms = omni_now_monotonic_ms64();
}
void omni_file_logger_init_path(omni_file_logger_t *logger, FILE *file, const char *path, int immediate_flush) {
struct stat st;
omni_file_logger_init(logger, file);
if (path != NULL && path[0] != '\0') {
snprintf(logger->path, sizeof(logger->path), "%s", path);
if (stat(path, &st) == 0) {
logger->current_bytes = (size_t) st.st_size;
}
}
logger->flush_bytes = omni_positive_size_env("BLITZ_JSONL_FLUSH_BYTES", 262144U);
logger->flush_interval_ms = omni_positive_int_env("BLITZ_JSONL_FLUSH_INTERVAL_MS", 1000);
logger->max_bytes = omni_positive_size_env("BLITZ_JSONL_ROTATE_BYTES", 134217728U);
logger->max_files = omni_positive_int_env("BLITZ_JSONL_ROTATE_FILES", 8);
logger->immediate_flush = immediate_flush != 0;
} }
void omni_file_logger_destroy(omni_file_logger_t *logger) { void omni_file_logger_destroy(omni_file_logger_t *logger) {
@@ -555,13 +763,32 @@ void omni_file_logger_destroy(omni_file_logger_t *logger) {
int omni_file_logger_write_line(omni_file_logger_t *logger, const char *line) { int omni_file_logger_write_line(omni_file_logger_t *logger, const char *line) {
int rc = 0; int rc = 0;
size_t line_len;
uint64_t now_ms;
if (logger == NULL || logger->file == NULL || line == NULL) { if (logger == NULL || logger->file == NULL || line == NULL) {
errno = EINVAL; errno = EINVAL;
return -1; return -1;
} }
line_len = strlen(line) + 1U;
now_ms = omni_now_monotonic_ms64();
pthread_mutex_lock(&logger->mutex); pthread_mutex_lock(&logger->mutex);
if (fputs(line, logger->file) == EOF || fputc('\n', logger->file) == EOF || fflush(logger->file) != 0) { if (fputs(line, logger->file) == EOF || fputc('\n', logger->file) == EOF) {
rc = -1; rc = -1;
} else {
logger->current_bytes += line_len;
logger->buffered_bytes += line_len;
if (logger->immediate_flush ||
logger->buffered_bytes >= logger->flush_bytes ||
(logger->flush_interval_ms > 0 && now_ms - logger->last_flush_monotonic_ms >= (uint64_t) logger->flush_interval_ms)) {
if (omni_file_logger_flush_locked(logger, now_ms) != 0) {
rc = -1;
}
}
if (rc == 0 && logger->max_bytes > 0U && logger->current_bytes >= logger->max_bytes) {
if (omni_file_logger_rotate_locked(logger) != 0) {
rc = -1;
}
}
} }
pthread_mutex_unlock(&logger->mutex); pthread_mutex_unlock(&logger->mutex);
return rc; return rc;

View File

@@ -18,7 +18,7 @@ tx_timestamp_debug_logger_t *tx_timestamp_debug_open_jsonl(const char *path) {
fclose(file); fclose(file);
return NULL; return NULL;
} }
omni_file_logger_init(&logger->file_logger, file); omni_file_logger_init_path(&logger->file_logger, file, path, 0);
logger->enabled = 1; logger->enabled = 1;
return logger; return logger;
} }

View File

@@ -213,6 +213,8 @@ void video_pipeline_config_init(video_pipeline_config_t *config) {
config->hard_backpressure_hold_ms = VIDEO_HARD_BACKPRESSURE_HOLD_MS_DEFAULT; config->hard_backpressure_hold_ms = VIDEO_HARD_BACKPRESSURE_HOLD_MS_DEFAULT;
config->server_idle_reconnect_ms = VIDEO_DEFAULT_SERVER_IDLE_RECONNECT_MS; config->server_idle_reconnect_ms = VIDEO_DEFAULT_SERVER_IDLE_RECONNECT_MS;
config->frame_stall_reconnect_ms = VIDEO_DEFAULT_FRAME_STALL_RECONNECT_MS; config->frame_stall_reconnect_ms = VIDEO_DEFAULT_FRAME_STALL_RECONNECT_MS;
config->stats_logger = NULL;
config->stats_interval_ms = 1000;
} }
void video_pipeline_config_load_env(video_pipeline_config_t *config) { void video_pipeline_config_load_env(video_pipeline_config_t *config) {
@@ -235,6 +237,7 @@ void video_pipeline_config_load_env(video_pipeline_config_t *config) {
config->hard_backpressure_hold_ms = env_int_or_default("OMNI_VIDEO_HARD_BACKPRESSURE_HOLD_MS", config->hard_backpressure_hold_ms); config->hard_backpressure_hold_ms = env_int_or_default("OMNI_VIDEO_HARD_BACKPRESSURE_HOLD_MS", config->hard_backpressure_hold_ms);
config->server_idle_reconnect_ms = env_int_or_default("OMNI_VIDEO_SERVER_IDLE_RECONNECT_MS", config->server_idle_reconnect_ms); config->server_idle_reconnect_ms = env_int_or_default("OMNI_VIDEO_SERVER_IDLE_RECONNECT_MS", config->server_idle_reconnect_ms);
config->frame_stall_reconnect_ms = env_int_or_default("OMNI_VIDEO_FRAME_STALL_RECONNECT_MS", config->frame_stall_reconnect_ms); config->frame_stall_reconnect_ms = env_int_or_default("OMNI_VIDEO_FRAME_STALL_RECONNECT_MS", config->frame_stall_reconnect_ms);
config->stats_interval_ms = env_int_or_default("BLITZ_KCP_STATS_INTERVAL_MS", config->stats_interval_ms);
} }
int video_pipeline_stats_init(video_pipeline_stats_t *stats) { int video_pipeline_stats_init(video_pipeline_stats_t *stats) {
@@ -600,8 +603,8 @@ static int video_sender_init(video_sender_t *sender, const video_pipeline_config
&options, &options,
NULL, NULL,
NULL, NULL,
NULL, config->stats_logger,
KCP_DEFAULT_STATS_INTERVAL_MS config->stats_interval_ms
); );
if (sender->client == NULL) { if (sender->client == NULL) {
return -1; return -1;

View File

@@ -186,6 +186,8 @@ void video_pipeline_config_init(video_pipeline_config_t *config) {
config->output_height = VIDEO_OUTPUT_HEIGHT_DEFAULT; config->output_height = VIDEO_OUTPUT_HEIGHT_DEFAULT;
config->max_frames = 0; config->max_frames = 0;
config->enable_timing_logs = 0; config->enable_timing_logs = 0;
config->stats_logger = NULL;
config->stats_interval_ms = 1000;
} }
void video_pipeline_config_load_env(video_pipeline_config_t *config) { void video_pipeline_config_load_env(video_pipeline_config_t *config) {
@@ -203,6 +205,7 @@ void video_pipeline_config_load_env(video_pipeline_config_t *config) {
config->max_frames = atoi(getenv("OMNI_VIDEO_MAX_FRAMES")); config->max_frames = atoi(getenv("OMNI_VIDEO_MAX_FRAMES"));
} }
config->enable_timing_logs = env_flag_or_default("OMNI_VIDEO_DEBUG_TIMING", config->enable_timing_logs); config->enable_timing_logs = env_flag_or_default("OMNI_VIDEO_DEBUG_TIMING", config->enable_timing_logs);
config->stats_interval_ms = env_int_or_default("BLITZ_KCP_STATS_INTERVAL_MS", config->stats_interval_ms);
} }
int video_pipeline_stats_init(video_pipeline_stats_t *stats) { int video_pipeline_stats_init(video_pipeline_stats_t *stats) {
@@ -564,8 +567,8 @@ static int video_sender_init(video_sender_t *sender, const video_pipeline_config
&options, &options,
NULL, NULL,
NULL, NULL,
NULL, config->stats_logger,
KCP_DEFAULT_STATS_INTERVAL_MS config->stats_interval_ms
); );
if (sender->client == NULL) { if (sender->client == NULL) {
return -1; return -1;