fix: 断联视频堆积问题与控制命令失效问题

This commit is contained in:
2026-04-11 03:55:19 +08:00
parent 6f727dbe57
commit 84e0cc54d2
8 changed files with 381 additions and 9 deletions

View File

@@ -16,6 +16,7 @@
#define CONTROL_DEFAULT_PEER_ID "peer-b-ctrl"
#define CONTROL_DEFAULT_EXPECTED_SENDER "peer-a-ctrl"
#define CONTROL_DEFAULT_UNIX_SOCKET "/tmp/omnisocket-b-side-cmd.sock"
#define CONTROL_DEFAULT_SERVER_IDLE_RECONNECT_MS 3000
typedef struct unix_dgram_client {
int fd;
@@ -31,9 +32,11 @@ typedef struct control_bridge_stats {
uint64_t invalid_packets;
uint64_t unix_send_errors;
uint64_t reconnect_count;
uint32_t server_idle_ms;
int ever_connected;
int registered;
char last_error[256];
char last_reconnect_reason[256];
kcp_runtime_stats_t transport;
} control_bridge_stats_t;
@@ -48,6 +51,7 @@ typedef struct daemon_state {
const char *control_peer_id;
const char *control_expected_sender;
const char *control_unix_socket;
int control_server_idle_reconnect_ms;
unix_dgram_client_t unix_client;
control_bridge_stats_t control_stats;
} daemon_state_t;
@@ -91,6 +95,20 @@ static const char *env_first_nonempty(const char *first, const char *second, con
return fallback;
}
static int env_int_or_default(const char *name, int fallback) {
const char *value = getenv(name);
int parsed;
if (value == NULL || value[0] == '\0') {
return fallback;
}
parsed = atoi(value);
if (parsed <= 0) {
return fallback;
}
return parsed;
}
static int control_bridge_stats_init(control_bridge_stats_t *stats) {
int rc;
if (stats == NULL) {
@@ -124,6 +142,15 @@ static void control_bridge_set_error(control_bridge_stats_t *stats, const char *
pthread_mutex_unlock(&stats->mutex);
}
static void control_bridge_set_reconnect_reason(control_bridge_stats_t *stats, const char *message) {
if (stats == NULL) {
return;
}
pthread_mutex_lock(&stats->mutex);
snprintf(stats->last_reconnect_reason, sizeof(stats->last_reconnect_reason), "%s", message == NULL ? "" : message);
pthread_mutex_unlock(&stats->mutex);
}
static void control_bridge_set_errno_error(control_bridge_stats_t *stats, const char *prefix) {
char buffer[256];
int saved_errno = errno;
@@ -149,12 +176,39 @@ static void control_bridge_stats_snapshot(control_bridge_stats_t *stats, control
out_stats->invalid_packets = stats->invalid_packets;
out_stats->unix_send_errors = stats->unix_send_errors;
out_stats->reconnect_count = stats->reconnect_count;
out_stats->server_idle_ms = stats->server_idle_ms;
out_stats->registered = stats->registered;
snprintf(out_stats->last_error, sizeof(out_stats->last_error), "%s", stats->last_error);
snprintf(out_stats->last_reconnect_reason, sizeof(out_stats->last_reconnect_reason), "%s", stats->last_reconnect_reason);
out_stats->transport = stats->transport;
pthread_mutex_unlock(&stats->mutex);
}
static int control_server_error_requires_reconnect(const char *message) {
if (message == NULL || message[0] == '\0') {
return 0;
}
return strstr(message, "not registered") != NULL
|| strstr(message, "first message must be register") != NULL
|| strstr(message, "peer replaced") != NULL
|| strstr(message, "timed out waiting for server_register_ok") != NULL;
}
static void control_message_body_to_cstr(const message_t *msg, char *buffer, size_t buffer_len) {
size_t copy_len;
if (buffer == NULL || buffer_len == 0) {
return;
}
buffer[0] = '\0';
if (msg == NULL || msg->body == NULL || msg->body_len == 0) {
return;
}
copy_len = msg->body_len < (buffer_len - 1U) ? msg->body_len : (buffer_len - 1U);
memcpy(buffer, msg->body, copy_len);
buffer[copy_len] = '\0';
}
static int unix_dgram_client_init(unix_dgram_client_t *client, const char *dest_path) {
struct sockaddr_un bind_addr;
pid_t pid;
@@ -241,9 +295,14 @@ static void *video_thread_main(void *arg) {
daemon_state_t *state = (daemon_state_t *) arg;
while (!*state->stop_requested) {
if (video_pipeline_run(&state->video_config, &state->video_stats, state->stop_requested) == 0) {
int video_rc = video_pipeline_run(&state->video_config, &state->video_stats, state->stop_requested);
if (video_rc == 0) {
break;
}
if (video_rc == VIDEO_PIPELINE_RUN_RETRY_IMMEDIATE) {
continue;
}
if (!*state->stop_requested) {
sleep(1);
}
@@ -257,6 +316,7 @@ static void *control_thread_main(void *arg) {
while (!*state->stop_requested) {
kcp_conn_options_t options;
kcp_client_t *client = NULL;
int reconnect_immediately = 0;
kcp_conn_options_set_control_defaults(&options);
client = kcp_client_dial_with_options(
@@ -289,7 +349,10 @@ static void *control_thread_main(void *arg) {
state->control_stats.ever_connected = 1;
}
state->control_stats.registered = client_state.registered;
state->control_stats.server_idle_ms = client_state.server_idle_ms;
state->control_stats.last_reconnect_reason[0] = '\0';
snprintf(state->control_stats.last_error, sizeof(state->control_stats.last_error), "%s", client_state.last_server_error);
kcp_client_runtime_stats_snapshot(client, &state->control_stats.transport);
pthread_mutex_unlock(&state->control_stats.mutex);
}
@@ -301,20 +364,63 @@ static void *control_thread_main(void *arg) {
protocol_message_init(&msg);
rc = kcp_client_receive_timed(client, &msg, 100);
if (rc == 1) {
char reconnect_reason[256];
protocol_message_clear(&msg);
memset(&client_state, 0, sizeof(client_state));
kcp_client_state_snapshot(client, &client_state);
pthread_mutex_lock(&state->control_stats.mutex);
state->control_stats.registered = client_state.registered;
state->control_stats.server_idle_ms = client_state.server_idle_ms;
snprintf(state->control_stats.last_error, sizeof(state->control_stats.last_error), "%s", client_state.last_server_error);
kcp_client_runtime_stats_snapshot(client, &state->control_stats.transport);
pthread_mutex_unlock(&state->control_stats.mutex);
if (!client_state.registered) {
snprintf(reconnect_reason, sizeof(reconnect_reason), "control session stale: server reported unregistered");
} else if (
state->control_server_idle_reconnect_ms > 0
&& client_state.server_idle_ms >= (uint32_t) state->control_server_idle_reconnect_ms
) {
snprintf(
reconnect_reason,
sizeof(reconnect_reason),
"control session stale: server idle timeout (%u ms >= %d ms)",
client_state.server_idle_ms,
state->control_server_idle_reconnect_ms
);
} else if (control_server_error_requires_reconnect(client_state.last_server_error)) {
snprintf(
reconnect_reason,
sizeof(reconnect_reason),
"control session stale: server error %.180s",
client_state.last_server_error
);
} else {
reconnect_reason[0] = '\0';
}
if (reconnect_reason[0] != '\0') {
control_bridge_set_error(&state->control_stats, reconnect_reason);
control_bridge_set_reconnect_reason(&state->control_stats, reconnect_reason);
fprintf(stderr, "[b_side_omnid] %s\n", reconnect_reason);
reconnect_immediately = 1;
break;
}
continue;
}
if (rc != 0) {
memset(&client_state, 0, sizeof(client_state));
kcp_client_state_snapshot(client, &client_state);
pthread_mutex_lock(&state->control_stats.mutex);
state->control_stats.registered = client_state.registered;
state->control_stats.server_idle_ms = client_state.server_idle_ms;
kcp_client_runtime_stats_snapshot(client, &state->control_stats.transport);
pthread_mutex_unlock(&state->control_stats.mutex);
if (client_state.last_server_error[0] != '\0') {
control_bridge_set_error(&state->control_stats, client_state.last_server_error);
if (control_server_error_requires_reconnect(client_state.last_server_error)) {
control_bridge_set_reconnect_reason(&state->control_stats, client_state.last_server_error);
reconnect_immediately = 1;
}
} else {
control_bridge_set_errno_error(&state->control_stats, "control receive loop stopped");
}
@@ -323,7 +429,20 @@ static void *control_thread_main(void *arg) {
}
if (msg.type == MSG_TYPE_ERROR && strcmp(msg.from, SERVER_PEER_ID) == 0) {
control_bridge_set_error(&state->control_stats, (const char *) msg.body);
char server_error[256];
control_message_body_to_cstr(&msg, server_error, sizeof(server_error));
control_bridge_set_error(&state->control_stats, server_error);
if (control_server_error_requires_reconnect(server_error)) {
char reconnect_reason[256];
snprintf(reconnect_reason, sizeof(reconnect_reason), "control session stale: server error %.180s", server_error);
control_bridge_set_reconnect_reason(&state->control_stats, reconnect_reason);
fprintf(stderr, "[b_side_omnid] %s\n", reconnect_reason);
reconnect_immediately = 1;
protocol_message_clear(&msg);
break;
}
protocol_message_clear(&msg);
continue;
}
@@ -351,8 +470,12 @@ static void *control_thread_main(void *arg) {
recovered = unix_dgram_client_send(&state->unix_client, msg.body, msg.body_len) == 0;
}
if (recovered) {
memset(&client_state, 0, sizeof(client_state));
kcp_client_state_snapshot(client, &client_state);
pthread_mutex_lock(&state->control_stats.mutex);
state->control_stats.packets_forwarded += 1;
state->control_stats.registered = client_state.registered;
state->control_stats.server_idle_ms = client_state.server_idle_ms;
kcp_client_runtime_stats_snapshot(client, &state->control_stats.transport);
pthread_mutex_unlock(&state->control_stats.mutex);
protocol_message_clear(&msg);
@@ -367,8 +490,12 @@ static void *control_thread_main(void *arg) {
continue;
}
memset(&client_state, 0, sizeof(client_state));
kcp_client_state_snapshot(client, &client_state);
pthread_mutex_lock(&state->control_stats.mutex);
state->control_stats.packets_forwarded += 1;
state->control_stats.registered = client_state.registered;
state->control_stats.server_idle_ms = client_state.server_idle_ms;
kcp_client_runtime_stats_snapshot(client, &state->control_stats.transport);
pthread_mutex_unlock(&state->control_stats.mutex);
protocol_message_clear(&msg);
@@ -376,10 +503,11 @@ static void *control_thread_main(void *arg) {
pthread_mutex_lock(&state->control_stats.mutex);
state->control_stats.registered = 0;
state->control_stats.server_idle_ms = 0;
pthread_mutex_unlock(&state->control_stats.mutex);
kcp_client_close(client);
kcp_client_free(client);
if (!*state->stop_requested) {
if (!*state->stop_requested && !reconnect_immediately) {
sleep(1);
}
}
@@ -398,17 +526,23 @@ static void print_stats(daemon_state_t *state) {
fprintf(
stderr,
"[b_side_omnid] video registered=%d frames=%llu bytes=%llu srtt=%dms | control registered=%d reconnects=%llu forwarded=%llu invalid=%llu unix_err=%llu srtt=%dms\n",
"[b_side_omnid] video registered=%d frames=%llu bytes=%llu drops=%llu resets=%llu backlog=%u reason=%s srtt=%dms | control registered=%d idle=%ums reconnects=%llu forwarded=%llu invalid=%llu unix_err=%llu srtt=%dms last_reconnect=%s\n",
video_stats.connected,
(unsigned long long) video_stats.frames_sent,
(unsigned long long) video_stats.bytes_sent,
(unsigned long long) video_stats.backpressure_drops,
(unsigned long long) video_stats.backlog_resets,
video_stats.last_backlog_segments,
video_stats.last_backlog_reason[0] == '\0' ? "-" : video_stats.last_backlog_reason,
video_stats.transport.srtt_ms,
control_stats.registered,
control_stats.server_idle_ms,
(unsigned long long) control_stats.reconnect_count,
(unsigned long long) control_stats.packets_forwarded,
(unsigned long long) control_stats.invalid_packets,
(unsigned long long) control_stats.unix_send_errors,
control_stats.transport.srtt_ms
control_stats.transport.srtt_ms,
control_stats.last_reconnect_reason[0] == '\0' ? "-" : control_stats.last_reconnect_reason
);
}
@@ -429,6 +563,10 @@ int main(void) {
state.control_peer_id = env_or_default("OMNI_CONTROL_PEER_ID", CONTROL_DEFAULT_PEER_ID);
state.control_expected_sender = env_or_default("OMNI_CONTROL_EXPECTED_SENDER", CONTROL_DEFAULT_EXPECTED_SENDER);
state.control_unix_socket = env_or_default("OMNI_CONTROL_UNIX_SOCKET_PATH", CONTROL_DEFAULT_UNIX_SOCKET);
state.control_server_idle_reconnect_ms = env_int_or_default(
"OMNI_CONTROL_SERVER_IDLE_RECONNECT_MS",
CONTROL_DEFAULT_SERVER_IDLE_RECONNECT_MS
);
if (state.video_config.server_addr == NULL || state.video_config.server_addr[0] == '\0' ||
state.control_server_addr == NULL || state.control_server_addr[0] == '\0') {