feat: 增加日志模块

This commit is contained in:
2026-04-18 12:52:39 +08:00
parent b700dab484
commit 212459a8e4
18 changed files with 938 additions and 42 deletions

View File

@@ -20,6 +20,8 @@
#define CONTROL_DEFAULT_PEER_ID "peer-b-ctrl"
#define CONTROL_DEFAULT_EXPECTED_SENDER "peer-a-ctrl"
#define CONTROL_ACK_DEFAULT_PEER_ID "peer-b-ctrl-ack"
#define CONTROL_ACK_DEFAULT_TARGET_PEER "peer-a-ctrl-ack"
#define CONTROL_DEFAULT_UNIX_SOCKET "/tmp/omnisocket-b-side-cmd.sock"
#define CONTROL_DEFAULT_SERVER_IDLE_RECONNECT_MS 3000
#define DEFAULT_RUNTIME_DIR "/run/blitz-robot"
@@ -29,6 +31,7 @@
#define DEFAULT_THREAD_HEARTBEAT_TIMEOUT_SEC 15
#define DEFAULT_KCP_STATS_INTERVAL_MS 1000
#define DEFAULT_CONTROL_LATENCY_SAMPLE_MOD 100
#define DEFAULT_CONTROL_ACK_SAMPLE_MOD 10
#define EXIT_CODE_VIDEO_THREAD_STALLED 101
#define EXIT_CODE_CONTROL_THREAD_STALLED 102
@@ -64,21 +67,32 @@ typedef struct daemon_state {
const char *control_bind_device;
const char *control_peer_id;
const char *control_expected_sender;
const char *control_ack_peer_id;
const char *control_ack_target_peer;
const char *control_unix_socket;
int control_server_idle_reconnect_ms;
const char *runtime_dir;
int heartbeat_timeout_sec;
int stats_interval_ms;
uint64_t control_latency_sample_mod;
uint64_t control_ack_sample_mod;
char status_file_path[512];
char video_thread_fault_file[512];
char control_thread_fault_file[512];
atomic_long video_thread_heartbeat_epoch_sec;
atomic_long control_thread_heartbeat_epoch_sec;
atomic_int control_ack_shutdown_requested;
kcp_session_stats_logger_t *stats_logger;
latency_logger_t *control_latency_logger;
video_stage_logger_t *video_stage_logger;
unix_dgram_client_t unix_client;
control_bridge_stats_t control_stats;
pthread_mutex_t control_ack_mutex;
pthread_t control_ack_thread;
kcp_client_t *control_ack_client;
int control_ack_thread_started;
int control_ack_connect_requested;
int control_ack_connect_inflight;
} daemon_state_t;
static volatile sig_atomic_t g_stop_requested = 0;
@@ -180,6 +194,19 @@ static int should_log_control_latency(const daemon_state_t *state, const message
return msg->id % sample_mod == 0U;
}
static int should_send_control_ack(const daemon_state_t *state, const message_t *msg) {
uint64_t sample_mod;
if (state == NULL || msg == NULL) {
return 0;
}
sample_mod = state->control_ack_sample_mod;
if (sample_mod <= 1U) {
return 1;
}
return msg->id % sample_mod == 0U;
}
static void video_pipeline_heartbeat_progress(void *context) {
update_thread_heartbeat((atomic_long *) context);
}
@@ -259,6 +286,62 @@ static void control_bridge_stats_destroy(control_bridge_stats_t *stats) {
static void unix_dgram_client_close(unix_dgram_client_t *client);
static void control_bridge_stats_snapshot(control_bridge_stats_t *stats, control_bridge_stats_t *out_stats);
static void close_control_ack_client(kcp_client_t **client_ptr);
static int control_ack_enabled(const daemon_state_t *state) {
return state != NULL
&& state->control_ack_peer_id != NULL
&& state->control_ack_peer_id[0] != '\0'
&& state->control_ack_target_peer != NULL
&& state->control_ack_target_peer[0] != '\0';
}
static int control_ack_manager_init(daemon_state_t *state) {
int rc;
if (state == NULL) {
errno = EINVAL;
return -1;
}
rc = pthread_mutex_init(&state->control_ack_mutex, NULL);
if (rc != 0) {
errno = rc;
return -1;
}
atomic_init(&state->control_ack_shutdown_requested, 0);
state->control_ack_client = NULL;
state->control_ack_thread_started = 0;
state->control_ack_connect_requested = 0;
state->control_ack_connect_inflight = 0;
return 0;
}
static void control_ack_manager_reset(daemon_state_t *state, int request_connect) {
kcp_client_t *client = NULL;
if (state == NULL) {
return;
}
pthread_mutex_lock(&state->control_ack_mutex);
client = state->control_ack_client;
state->control_ack_client = NULL;
state->control_ack_connect_requested = request_connect && control_ack_enabled(state) && state->control_ack_thread_started;
pthread_mutex_unlock(&state->control_ack_mutex);
close_control_ack_client(&client);
}
static void control_ack_manager_destroy(daemon_state_t *state) {
if (state == NULL) {
return;
}
atomic_store(&state->control_ack_shutdown_requested, 1);
if (state->control_ack_thread_started) {
pthread_join(state->control_ack_thread, NULL);
state->control_ack_thread_started = 0;
}
control_ack_manager_reset(state, 0);
pthread_mutex_destroy(&state->control_ack_mutex);
}
static int write_status_json_atomic(const char *path, cJSON *root) {
char *json;
@@ -348,6 +431,8 @@ static int write_daemon_status_file(daemon_state_t *state) {
cJSON_AddNumberToObject(root, "video_frames_sent", (double) video_stats.frames_sent);
cJSON_AddNumberToObject(root, "video_send_errors", (double) video_stats.send_errors);
cJSON_AddNumberToObject(root, "video_backlog_resets", (double) video_stats.backlog_resets);
cJSON_AddNumberToObject(root, "video_last_capture_to_send_ms", (double) video_stats.last_capture_to_send_ms);
cJSON_AddNumberToObject(root, "video_avg_capture_to_send_ms", video_stats.avg_capture_to_send_ms);
cJSON_AddStringToObject(root, "video_last_error", video_stats.last_error);
cJSON_AddBoolToObject(root, "control_registered", control_stats.registered != 0);
cJSON_AddNumberToObject(root, "control_reconnect_count", (double) control_stats.reconnect_count);
@@ -467,6 +552,147 @@ static void control_message_body_to_cstr(const message_t *msg, char *buffer, siz
buffer[copy_len] = '\0';
}
static kcp_client_t *connect_control_ack_client(const daemon_state_t *state) {
kcp_conn_options_t options;
if (state == NULL || state->control_ack_peer_id == NULL || state->control_ack_peer_id[0] == '\0') {
errno = EINVAL;
return NULL;
}
kcp_conn_options_set_control_defaults(&options);
return kcp_client_dial_with_options(
state->control_server_addr,
state->control_relay_via,
state->control_ack_peer_id,
state->control_bind_ip,
state->control_bind_device,
&options,
NULL,
NULL,
state->stats_logger,
state->stats_interval_ms
);
}
static void close_control_ack_client(kcp_client_t **client_ptr) {
if (client_ptr == NULL || *client_ptr == NULL) {
return;
}
kcp_client_close(*client_ptr);
kcp_client_free(*client_ptr);
*client_ptr = NULL;
}
static void control_ack_manager_request_connect(daemon_state_t *state) {
if (state == NULL || !control_ack_enabled(state) || !state->control_ack_thread_started) {
return;
}
pthread_mutex_lock(&state->control_ack_mutex);
if (state->control_ack_client == NULL) {
state->control_ack_connect_requested = 1;
}
pthread_mutex_unlock(&state->control_ack_mutex);
}
static void *control_ack_thread_main(void *arg) {
daemon_state_t *state = (daemon_state_t *) arg;
while (!atomic_load(&state->control_ack_shutdown_requested) && !*state->stop_requested) {
kcp_client_t *client = NULL;
int connect_failed = 0;
int should_connect = 0;
pthread_mutex_lock(&state->control_ack_mutex);
if (state->control_ack_connect_requested && state->control_ack_client == NULL && !state->control_ack_connect_inflight) {
state->control_ack_connect_inflight = 1;
should_connect = 1;
}
pthread_mutex_unlock(&state->control_ack_mutex);
if (!should_connect) {
usleep(200000);
continue;
}
client = connect_control_ack_client(state);
connect_failed = client == NULL;
pthread_mutex_lock(&state->control_ack_mutex);
state->control_ack_connect_inflight = 0;
if (
client != NULL
&& state->control_ack_connect_requested
&& state->control_ack_client == NULL
&& !atomic_load(&state->control_ack_shutdown_requested)
&& !*state->stop_requested
) {
state->control_ack_client = client;
state->control_ack_connect_requested = 0;
client = NULL;
}
pthread_mutex_unlock(&state->control_ack_mutex);
if (client != NULL) {
close_control_ack_client(&client);
}
if (connect_failed && !atomic_load(&state->control_ack_shutdown_requested) && !*state->stop_requested) {
sleep(1);
}
}
return NULL;
}
static void maybe_send_control_ack(
daemon_state_t *state,
const message_t *msg,
int64_t recv_unix_nano,
int64_t persist_end_unix_nano,
const char *sample_reason
) {
kcp_client_t *ack_client = NULL;
kcp_client_t *client_to_close = NULL;
char *payload = NULL;
int send_rc = -1;
if (
state == NULL || msg == NULL || recv_unix_nano <= 0 || persist_end_unix_nano <= recv_unix_nano
|| !control_ack_enabled(state) || !state->control_ack_thread_started
) {
return;
}
payload = omni_strdup_printf(
"{\"message_id\":%" PRIu64 ",\"ack_phase\":\"persist_end\",\"b_recv_to_persist_us\":%" PRId64 ",\"unix_send_ok\":true,\"sample_reason\":\"%s\"}",
msg->id,
(persist_end_unix_nano - recv_unix_nano) / 1000,
sample_reason == NULL ? "sample_mod" : sample_reason
);
if (payload == NULL) {
return;
}
pthread_mutex_lock(&state->control_ack_mutex);
ack_client = state->control_ack_client;
if (ack_client == NULL) {
state->control_ack_connect_requested = 1;
pthread_mutex_unlock(&state->control_ack_mutex);
free(payload);
return;
}
send_rc = kcp_client_send_text(ack_client, state->control_ack_target_peer, payload);
if (send_rc != 0) {
client_to_close = state->control_ack_client;
state->control_ack_client = NULL;
state->control_ack_connect_requested = 1;
}
pthread_mutex_unlock(&state->control_ack_mutex);
free(payload);
if (client_to_close != NULL) {
close_control_ack_client(&client_to_close);
}
}
static int unix_dgram_client_init(unix_dgram_client_t *client, const char *dest_path) {
struct sockaddr_un bind_addr;
pid_t pid;
@@ -618,11 +844,17 @@ static void *control_thread_main(void *arg) {
kcp_client_runtime_stats_snapshot(client, &state->control_stats.transport);
pthread_mutex_unlock(&state->control_stats.mutex);
}
control_ack_manager_request_connect(state);
while (!*state->stop_requested) {
message_t msg;
int rc;
kcp_client_state_t client_state;
int ack_sampled = 0;
int log_control_latency = 0;
int64_t recv_unix_nano = 0;
int64_t persist_begin_unix_nano = 0;
int64_t persist_end_unix_nano = 0;
update_thread_heartbeat(&state->control_thread_heartbeat_epoch_sec);
protocol_message_init(&msg);
@@ -727,9 +959,27 @@ static void *control_thread_main(void *arg) {
continue;
}
if (should_log_control_latency(state, &msg)) {
latencylog_log_message_event(state->control_latency_logger, OMNI_NODE_ROLE_PEER, state->control_peer_id, EVENT_B_APP_RECV, &msg);
latencylog_log_message_event(state->control_latency_logger, OMNI_NODE_ROLE_PEER, state->control_peer_id, EVENT_B_PERSIST_BEGIN, &msg);
ack_sampled = should_send_control_ack(state, &msg);
log_control_latency = ack_sampled || should_log_control_latency(state, &msg);
if (log_control_latency) {
recv_unix_nano = omni_now_unix_nano();
persist_begin_unix_nano = recv_unix_nano;
latencylog_log_message_event_at(
state->control_latency_logger,
OMNI_NODE_ROLE_PEER,
state->control_peer_id,
EVENT_B_APP_RECV,
recv_unix_nano,
&msg
);
latencylog_log_message_event_at(
state->control_latency_logger,
OMNI_NODE_ROLE_PEER,
state->control_peer_id,
EVENT_B_PERSIST_BEGIN,
persist_begin_unix_nano,
&msg
);
}
if (unix_dgram_client_send(&state->unix_client, msg.body, msg.body_len) != 0) {
@@ -748,8 +998,19 @@ static void *control_thread_main(void *arg) {
state->control_stats.server_idle_ms = client_state.server_idle_ms;
kcp_client_runtime_stats_snapshot(client, &state->control_stats.transport);
pthread_mutex_unlock(&state->control_stats.mutex);
if (should_log_control_latency(state, &msg)) {
latencylog_log_message_event(state->control_latency_logger, OMNI_NODE_ROLE_PEER, state->control_peer_id, EVENT_B_PERSIST_END, &msg);
if (log_control_latency) {
persist_end_unix_nano = omni_now_unix_nano();
latencylog_log_message_event_at(
state->control_latency_logger,
OMNI_NODE_ROLE_PEER,
state->control_peer_id,
EVENT_B_PERSIST_END,
persist_end_unix_nano,
&msg
);
}
if (ack_sampled) {
maybe_send_control_ack(state, &msg, recv_unix_nano, persist_end_unix_nano, "sample_mod");
}
protocol_message_clear(&msg);
continue;
@@ -771,8 +1032,19 @@ static void *control_thread_main(void *arg) {
state->control_stats.server_idle_ms = client_state.server_idle_ms;
kcp_client_runtime_stats_snapshot(client, &state->control_stats.transport);
pthread_mutex_unlock(&state->control_stats.mutex);
if (should_log_control_latency(state, &msg)) {
latencylog_log_message_event(state->control_latency_logger, OMNI_NODE_ROLE_PEER, state->control_peer_id, EVENT_B_PERSIST_END, &msg);
if (log_control_latency) {
persist_end_unix_nano = omni_now_unix_nano();
latencylog_log_message_event_at(
state->control_latency_logger,
OMNI_NODE_ROLE_PEER,
state->control_peer_id,
EVENT_B_PERSIST_END,
persist_end_unix_nano,
&msg
);
}
if (ack_sampled) {
maybe_send_control_ack(state, &msg, recv_unix_nano, persist_end_unix_nano, "sample_mod");
}
protocol_message_clear(&msg);
}
@@ -781,6 +1053,7 @@ static void *control_thread_main(void *arg) {
state->control_stats.registered = 0;
state->control_stats.server_idle_ms = 0;
pthread_mutex_unlock(&state->control_stats.mutex);
control_ack_manager_reset(state, 0);
kcp_client_close(client);
kcp_client_free(client);
if (!*state->stop_requested && !reconnect_immediately) {
@@ -802,13 +1075,15 @@ static void print_stats(daemon_state_t *state) {
fprintf(
stderr,
"[b_side_omnid] video registered=%d frames=%llu bytes=%llu drops=%llu resets=%llu backlog=%u reason=%s srtt=%dms | control registered=%d idle=%ums reconnects=%llu forwarded=%llu invalid=%llu unix_err=%llu srtt=%dms last_reconnect=%s\n",
"[b_side_omnid] video registered=%d frames=%llu bytes=%llu drops=%llu resets=%llu backlog=%u cap2send=%ums avg=%.1fms reason=%s srtt=%dms | control registered=%d idle=%ums reconnects=%llu forwarded=%llu invalid=%llu unix_err=%llu srtt=%dms last_reconnect=%s\n",
video_stats.connected,
(unsigned long long) video_stats.frames_sent,
(unsigned long long) video_stats.bytes_sent,
(unsigned long long) video_stats.backpressure_drops,
(unsigned long long) video_stats.backlog_resets,
video_stats.last_backlog_segments,
video_stats.last_capture_to_send_ms,
video_stats.avg_capture_to_send_ms,
video_stats.last_backlog_reason[0] == '\0' ? "-" : video_stats.last_backlog_reason,
video_stats.transport.srtt_ms,
control_stats.registered,
@@ -839,6 +1114,8 @@ int main(void) {
state.control_bind_device = env_first_nonempty("OMNI_CONTROL_BIND_DEVICE", "OMNISOCKET_BIND_DEVICE", "");
state.control_peer_id = env_or_default("OMNI_CONTROL_PEER_ID", CONTROL_DEFAULT_PEER_ID);
state.control_expected_sender = env_or_default("OMNI_CONTROL_EXPECTED_SENDER", CONTROL_DEFAULT_EXPECTED_SENDER);
state.control_ack_peer_id = env_or_default("OMNI_CONTROL_ACK_PEER_ID", CONTROL_ACK_DEFAULT_PEER_ID);
state.control_ack_target_peer = env_or_default("OMNI_CONTROL_ACK_TARGET_PEER", CONTROL_ACK_DEFAULT_TARGET_PEER);
state.control_unix_socket = env_or_default("OMNI_CONTROL_UNIX_SOCKET_PATH", CONTROL_DEFAULT_UNIX_SOCKET);
state.runtime_dir = env_or_default("BLITZ_RUNTIME_DIR", DEFAULT_RUNTIME_DIR);
state.heartbeat_timeout_sec = env_int_or_default(
@@ -847,9 +1124,11 @@ int main(void) {
);
state.stats_interval_ms = env_int_or_default("BLITZ_KCP_STATS_INTERVAL_MS", DEFAULT_KCP_STATS_INTERVAL_MS);
state.control_latency_sample_mod = env_u64_or_default("BLITZ_CONTROL_LATENCY_LOG_SAMPLE_MOD", DEFAULT_CONTROL_LATENCY_SAMPLE_MOD);
state.control_ack_sample_mod = env_u64_or_default("BLITZ_CONTROL_ACK_SAMPLE_MOD", DEFAULT_CONTROL_ACK_SAMPLE_MOD);
state.video_config.progress_callback = video_pipeline_heartbeat_progress;
state.video_config.progress_context = &state.video_thread_heartbeat_epoch_sec;
state.video_config.stats_logger = NULL;
state.video_config.stage_logger = NULL;
state.video_config.stats_interval_ms = state.stats_interval_ms;
state.control_server_idle_reconnect_ms = env_int_or_default(
"OMNI_CONTROL_SERVER_IDLE_RECONNECT_MS",
@@ -889,8 +1168,15 @@ int main(void) {
video_pipeline_stats_destroy(&state.video_stats);
return 1;
}
if (control_ack_manager_init(&state) != 0) {
perror("control_ack_manager_init");
control_bridge_stats_destroy(&state.control_stats);
video_pipeline_stats_destroy(&state.video_stats);
return 1;
}
if (unix_dgram_client_init(&state.unix_client, state.control_unix_socket) != 0) {
perror("unix_dgram_client_init");
control_ack_manager_destroy(&state);
control_bridge_stats_destroy(&state.control_stats);
video_pipeline_stats_destroy(&state.video_stats);
return 1;
@@ -905,6 +1191,7 @@ int main(void) {
if (install_signal_handler(SIGINT) != 0 || install_signal_handler(SIGTERM) != 0) {
perror("install_signal_handler");
unix_dgram_client_close(&state.unix_client);
control_ack_manager_destroy(&state);
control_bridge_stats_destroy(&state.control_stats);
video_pipeline_stats_destroy(&state.video_stats);
return 1;
@@ -913,7 +1200,10 @@ int main(void) {
{
const char *stats_log_path = getenv("BLITZ_KCP_STATS_LOG_PATH");
const char *latency_log_path = getenv("BLITZ_CONTROL_LATENCY_LOG_PATH");
const char *video_stage_log_path = getenv("BLITZ_VIDEO_STAGE_LOG_PATH");
int latency_enabled = env_int_or_default("BLITZ_CONTROL_LATENCY_LOG_ENABLED", 1);
int video_stage_log_enabled = env_int_or_default("BLITZ_VIDEO_STAGE_LOG_ENABLED", 1);
uint64_t video_stage_log_sample_mod = env_u64_or_default("BLITZ_VIDEO_STAGE_LOG_SAMPLE_MOD", 10);
if (stats_log_path != NULL && stats_log_path[0] != '\0') {
state.stats_logger = kcp_session_stats_open_jsonl(stats_log_path);
@@ -927,16 +1217,33 @@ int main(void) {
fprintf(stderr, "[b_side_omnid] warning: failed to open control latency log %s\n", latency_log_path);
}
}
if (video_stage_log_enabled && video_stage_log_path != NULL && video_stage_log_path[0] != '\0') {
state.video_stage_logger = video_stage_logger_open_jsonl(video_stage_log_path, video_stage_log_sample_mod);
if (state.video_stage_logger == NULL) {
fprintf(stderr, "[b_side_omnid] warning: failed to open video stage log %s\n", video_stage_log_path);
}
}
state.video_config.stats_logger = state.stats_logger;
state.video_config.stage_logger = state.video_stage_logger;
state.video_config.stats_interval_ms = state.stats_interval_ms;
}
if (control_ack_enabled(&state)) {
if (pthread_create(&state.control_ack_thread, NULL, control_ack_thread_main, &state) != 0) {
fprintf(stderr, "[b_side_omnid] warning: failed to start async control ACK manager, ACK sampling disabled\n");
} else {
state.control_ack_thread_started = 1;
}
}
if (pthread_create(&video_thread, NULL, video_thread_main, &state) != 0) {
perror("pthread_create(video_thread)");
unix_dgram_client_close(&state.unix_client);
control_ack_manager_destroy(&state);
control_bridge_stats_destroy(&state.control_stats);
video_pipeline_stats_destroy(&state.video_stats);
latencylog_close(state.control_latency_logger);
video_stage_logger_close(state.video_stage_logger);
kcp_session_stats_close(state.stats_logger);
return 1;
}
@@ -945,9 +1252,11 @@ int main(void) {
g_stop_requested = 1;
pthread_join(video_thread, NULL);
unix_dgram_client_close(&state.unix_client);
control_ack_manager_destroy(&state);
control_bridge_stats_destroy(&state.control_stats);
video_pipeline_stats_destroy(&state.video_stats);
latencylog_close(state.control_latency_logger);
video_stage_logger_close(state.video_stage_logger);
kcp_session_stats_close(state.stats_logger);
return 1;
}
@@ -964,9 +1273,11 @@ int main(void) {
pthread_join(video_thread, NULL);
pthread_join(control_thread, NULL);
unix_dgram_client_close(&state.unix_client);
control_ack_manager_destroy(&state);
control_bridge_stats_destroy(&state.control_stats);
video_pipeline_stats_destroy(&state.video_stats);
latencylog_close(state.control_latency_logger);
video_stage_logger_close(state.video_stage_logger);
kcp_session_stats_close(state.stats_logger);
return 0;
}