fix: 断联视频堆积问题与控制命令失效问题
This commit is contained in:
@@ -16,6 +16,7 @@
|
||||
#define CONTROL_DEFAULT_PEER_ID "peer-b-ctrl"
|
||||
#define CONTROL_DEFAULT_EXPECTED_SENDER "peer-a-ctrl"
|
||||
#define CONTROL_DEFAULT_UNIX_SOCKET "/tmp/omnisocket-b-side-cmd.sock"
|
||||
#define CONTROL_DEFAULT_SERVER_IDLE_RECONNECT_MS 3000
|
||||
|
||||
typedef struct unix_dgram_client {
|
||||
int fd;
|
||||
@@ -31,9 +32,11 @@ typedef struct control_bridge_stats {
|
||||
uint64_t invalid_packets;
|
||||
uint64_t unix_send_errors;
|
||||
uint64_t reconnect_count;
|
||||
uint32_t server_idle_ms;
|
||||
int ever_connected;
|
||||
int registered;
|
||||
char last_error[256];
|
||||
char last_reconnect_reason[256];
|
||||
kcp_runtime_stats_t transport;
|
||||
} control_bridge_stats_t;
|
||||
|
||||
@@ -48,6 +51,7 @@ typedef struct daemon_state {
|
||||
const char *control_peer_id;
|
||||
const char *control_expected_sender;
|
||||
const char *control_unix_socket;
|
||||
int control_server_idle_reconnect_ms;
|
||||
unix_dgram_client_t unix_client;
|
||||
control_bridge_stats_t control_stats;
|
||||
} daemon_state_t;
|
||||
@@ -91,6 +95,20 @@ static const char *env_first_nonempty(const char *first, const char *second, con
|
||||
return fallback;
|
||||
}
|
||||
|
||||
static int env_int_or_default(const char *name, int fallback) {
|
||||
const char *value = getenv(name);
|
||||
int parsed;
|
||||
|
||||
if (value == NULL || value[0] == '\0') {
|
||||
return fallback;
|
||||
}
|
||||
parsed = atoi(value);
|
||||
if (parsed <= 0) {
|
||||
return fallback;
|
||||
}
|
||||
return parsed;
|
||||
}
|
||||
|
||||
static int control_bridge_stats_init(control_bridge_stats_t *stats) {
|
||||
int rc;
|
||||
if (stats == NULL) {
|
||||
@@ -124,6 +142,15 @@ static void control_bridge_set_error(control_bridge_stats_t *stats, const char *
|
||||
pthread_mutex_unlock(&stats->mutex);
|
||||
}
|
||||
|
||||
static void control_bridge_set_reconnect_reason(control_bridge_stats_t *stats, const char *message) {
|
||||
if (stats == NULL) {
|
||||
return;
|
||||
}
|
||||
pthread_mutex_lock(&stats->mutex);
|
||||
snprintf(stats->last_reconnect_reason, sizeof(stats->last_reconnect_reason), "%s", message == NULL ? "" : message);
|
||||
pthread_mutex_unlock(&stats->mutex);
|
||||
}
|
||||
|
||||
static void control_bridge_set_errno_error(control_bridge_stats_t *stats, const char *prefix) {
|
||||
char buffer[256];
|
||||
int saved_errno = errno;
|
||||
@@ -149,12 +176,39 @@ static void control_bridge_stats_snapshot(control_bridge_stats_t *stats, control
|
||||
out_stats->invalid_packets = stats->invalid_packets;
|
||||
out_stats->unix_send_errors = stats->unix_send_errors;
|
||||
out_stats->reconnect_count = stats->reconnect_count;
|
||||
out_stats->server_idle_ms = stats->server_idle_ms;
|
||||
out_stats->registered = stats->registered;
|
||||
snprintf(out_stats->last_error, sizeof(out_stats->last_error), "%s", stats->last_error);
|
||||
snprintf(out_stats->last_reconnect_reason, sizeof(out_stats->last_reconnect_reason), "%s", stats->last_reconnect_reason);
|
||||
out_stats->transport = stats->transport;
|
||||
pthread_mutex_unlock(&stats->mutex);
|
||||
}
|
||||
|
||||
static int control_server_error_requires_reconnect(const char *message) {
|
||||
if (message == NULL || message[0] == '\0') {
|
||||
return 0;
|
||||
}
|
||||
return strstr(message, "not registered") != NULL
|
||||
|| strstr(message, "first message must be register") != NULL
|
||||
|| strstr(message, "peer replaced") != NULL
|
||||
|| strstr(message, "timed out waiting for server_register_ok") != NULL;
|
||||
}
|
||||
|
||||
static void control_message_body_to_cstr(const message_t *msg, char *buffer, size_t buffer_len) {
|
||||
size_t copy_len;
|
||||
|
||||
if (buffer == NULL || buffer_len == 0) {
|
||||
return;
|
||||
}
|
||||
buffer[0] = '\0';
|
||||
if (msg == NULL || msg->body == NULL || msg->body_len == 0) {
|
||||
return;
|
||||
}
|
||||
copy_len = msg->body_len < (buffer_len - 1U) ? msg->body_len : (buffer_len - 1U);
|
||||
memcpy(buffer, msg->body, copy_len);
|
||||
buffer[copy_len] = '\0';
|
||||
}
|
||||
|
||||
static int unix_dgram_client_init(unix_dgram_client_t *client, const char *dest_path) {
|
||||
struct sockaddr_un bind_addr;
|
||||
pid_t pid;
|
||||
@@ -241,9 +295,14 @@ static void *video_thread_main(void *arg) {
|
||||
daemon_state_t *state = (daemon_state_t *) arg;
|
||||
|
||||
while (!*state->stop_requested) {
|
||||
if (video_pipeline_run(&state->video_config, &state->video_stats, state->stop_requested) == 0) {
|
||||
int video_rc = video_pipeline_run(&state->video_config, &state->video_stats, state->stop_requested);
|
||||
|
||||
if (video_rc == 0) {
|
||||
break;
|
||||
}
|
||||
if (video_rc == VIDEO_PIPELINE_RUN_RETRY_IMMEDIATE) {
|
||||
continue;
|
||||
}
|
||||
if (!*state->stop_requested) {
|
||||
sleep(1);
|
||||
}
|
||||
@@ -257,6 +316,7 @@ static void *control_thread_main(void *arg) {
|
||||
while (!*state->stop_requested) {
|
||||
kcp_conn_options_t options;
|
||||
kcp_client_t *client = NULL;
|
||||
int reconnect_immediately = 0;
|
||||
|
||||
kcp_conn_options_set_control_defaults(&options);
|
||||
client = kcp_client_dial_with_options(
|
||||
@@ -289,7 +349,10 @@ static void *control_thread_main(void *arg) {
|
||||
state->control_stats.ever_connected = 1;
|
||||
}
|
||||
state->control_stats.registered = client_state.registered;
|
||||
state->control_stats.server_idle_ms = client_state.server_idle_ms;
|
||||
state->control_stats.last_reconnect_reason[0] = '\0';
|
||||
snprintf(state->control_stats.last_error, sizeof(state->control_stats.last_error), "%s", client_state.last_server_error);
|
||||
kcp_client_runtime_stats_snapshot(client, &state->control_stats.transport);
|
||||
pthread_mutex_unlock(&state->control_stats.mutex);
|
||||
}
|
||||
|
||||
@@ -301,20 +364,63 @@ static void *control_thread_main(void *arg) {
|
||||
protocol_message_init(&msg);
|
||||
rc = kcp_client_receive_timed(client, &msg, 100);
|
||||
if (rc == 1) {
|
||||
char reconnect_reason[256];
|
||||
|
||||
protocol_message_clear(&msg);
|
||||
memset(&client_state, 0, sizeof(client_state));
|
||||
kcp_client_state_snapshot(client, &client_state);
|
||||
pthread_mutex_lock(&state->control_stats.mutex);
|
||||
state->control_stats.registered = client_state.registered;
|
||||
state->control_stats.server_idle_ms = client_state.server_idle_ms;
|
||||
snprintf(state->control_stats.last_error, sizeof(state->control_stats.last_error), "%s", client_state.last_server_error);
|
||||
kcp_client_runtime_stats_snapshot(client, &state->control_stats.transport);
|
||||
pthread_mutex_unlock(&state->control_stats.mutex);
|
||||
if (!client_state.registered) {
|
||||
snprintf(reconnect_reason, sizeof(reconnect_reason), "control session stale: server reported unregistered");
|
||||
} else if (
|
||||
state->control_server_idle_reconnect_ms > 0
|
||||
&& client_state.server_idle_ms >= (uint32_t) state->control_server_idle_reconnect_ms
|
||||
) {
|
||||
snprintf(
|
||||
reconnect_reason,
|
||||
sizeof(reconnect_reason),
|
||||
"control session stale: server idle timeout (%u ms >= %d ms)",
|
||||
client_state.server_idle_ms,
|
||||
state->control_server_idle_reconnect_ms
|
||||
);
|
||||
} else if (control_server_error_requires_reconnect(client_state.last_server_error)) {
|
||||
snprintf(
|
||||
reconnect_reason,
|
||||
sizeof(reconnect_reason),
|
||||
"control session stale: server error %.180s",
|
||||
client_state.last_server_error
|
||||
);
|
||||
} else {
|
||||
reconnect_reason[0] = '\0';
|
||||
}
|
||||
if (reconnect_reason[0] != '\0') {
|
||||
control_bridge_set_error(&state->control_stats, reconnect_reason);
|
||||
control_bridge_set_reconnect_reason(&state->control_stats, reconnect_reason);
|
||||
fprintf(stderr, "[b_side_omnid] %s\n", reconnect_reason);
|
||||
reconnect_immediately = 1;
|
||||
break;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
if (rc != 0) {
|
||||
memset(&client_state, 0, sizeof(client_state));
|
||||
kcp_client_state_snapshot(client, &client_state);
|
||||
pthread_mutex_lock(&state->control_stats.mutex);
|
||||
state->control_stats.registered = client_state.registered;
|
||||
state->control_stats.server_idle_ms = client_state.server_idle_ms;
|
||||
kcp_client_runtime_stats_snapshot(client, &state->control_stats.transport);
|
||||
pthread_mutex_unlock(&state->control_stats.mutex);
|
||||
if (client_state.last_server_error[0] != '\0') {
|
||||
control_bridge_set_error(&state->control_stats, client_state.last_server_error);
|
||||
if (control_server_error_requires_reconnect(client_state.last_server_error)) {
|
||||
control_bridge_set_reconnect_reason(&state->control_stats, client_state.last_server_error);
|
||||
reconnect_immediately = 1;
|
||||
}
|
||||
} else {
|
||||
control_bridge_set_errno_error(&state->control_stats, "control receive loop stopped");
|
||||
}
|
||||
@@ -323,7 +429,20 @@ static void *control_thread_main(void *arg) {
|
||||
}
|
||||
|
||||
if (msg.type == MSG_TYPE_ERROR && strcmp(msg.from, SERVER_PEER_ID) == 0) {
|
||||
control_bridge_set_error(&state->control_stats, (const char *) msg.body);
|
||||
char server_error[256];
|
||||
|
||||
control_message_body_to_cstr(&msg, server_error, sizeof(server_error));
|
||||
control_bridge_set_error(&state->control_stats, server_error);
|
||||
if (control_server_error_requires_reconnect(server_error)) {
|
||||
char reconnect_reason[256];
|
||||
|
||||
snprintf(reconnect_reason, sizeof(reconnect_reason), "control session stale: server error %.180s", server_error);
|
||||
control_bridge_set_reconnect_reason(&state->control_stats, reconnect_reason);
|
||||
fprintf(stderr, "[b_side_omnid] %s\n", reconnect_reason);
|
||||
reconnect_immediately = 1;
|
||||
protocol_message_clear(&msg);
|
||||
break;
|
||||
}
|
||||
protocol_message_clear(&msg);
|
||||
continue;
|
||||
}
|
||||
@@ -351,8 +470,12 @@ static void *control_thread_main(void *arg) {
|
||||
recovered = unix_dgram_client_send(&state->unix_client, msg.body, msg.body_len) == 0;
|
||||
}
|
||||
if (recovered) {
|
||||
memset(&client_state, 0, sizeof(client_state));
|
||||
kcp_client_state_snapshot(client, &client_state);
|
||||
pthread_mutex_lock(&state->control_stats.mutex);
|
||||
state->control_stats.packets_forwarded += 1;
|
||||
state->control_stats.registered = client_state.registered;
|
||||
state->control_stats.server_idle_ms = client_state.server_idle_ms;
|
||||
kcp_client_runtime_stats_snapshot(client, &state->control_stats.transport);
|
||||
pthread_mutex_unlock(&state->control_stats.mutex);
|
||||
protocol_message_clear(&msg);
|
||||
@@ -367,8 +490,12 @@ static void *control_thread_main(void *arg) {
|
||||
continue;
|
||||
}
|
||||
|
||||
memset(&client_state, 0, sizeof(client_state));
|
||||
kcp_client_state_snapshot(client, &client_state);
|
||||
pthread_mutex_lock(&state->control_stats.mutex);
|
||||
state->control_stats.packets_forwarded += 1;
|
||||
state->control_stats.registered = client_state.registered;
|
||||
state->control_stats.server_idle_ms = client_state.server_idle_ms;
|
||||
kcp_client_runtime_stats_snapshot(client, &state->control_stats.transport);
|
||||
pthread_mutex_unlock(&state->control_stats.mutex);
|
||||
protocol_message_clear(&msg);
|
||||
@@ -376,10 +503,11 @@ static void *control_thread_main(void *arg) {
|
||||
|
||||
pthread_mutex_lock(&state->control_stats.mutex);
|
||||
state->control_stats.registered = 0;
|
||||
state->control_stats.server_idle_ms = 0;
|
||||
pthread_mutex_unlock(&state->control_stats.mutex);
|
||||
kcp_client_close(client);
|
||||
kcp_client_free(client);
|
||||
if (!*state->stop_requested) {
|
||||
if (!*state->stop_requested && !reconnect_immediately) {
|
||||
sleep(1);
|
||||
}
|
||||
}
|
||||
@@ -398,17 +526,23 @@ static void print_stats(daemon_state_t *state) {
|
||||
|
||||
fprintf(
|
||||
stderr,
|
||||
"[b_side_omnid] video registered=%d frames=%llu bytes=%llu srtt=%dms | control registered=%d reconnects=%llu forwarded=%llu invalid=%llu unix_err=%llu srtt=%dms\n",
|
||||
"[b_side_omnid] video registered=%d frames=%llu bytes=%llu drops=%llu resets=%llu backlog=%u reason=%s srtt=%dms | control registered=%d idle=%ums reconnects=%llu forwarded=%llu invalid=%llu unix_err=%llu srtt=%dms last_reconnect=%s\n",
|
||||
video_stats.connected,
|
||||
(unsigned long long) video_stats.frames_sent,
|
||||
(unsigned long long) video_stats.bytes_sent,
|
||||
(unsigned long long) video_stats.backpressure_drops,
|
||||
(unsigned long long) video_stats.backlog_resets,
|
||||
video_stats.last_backlog_segments,
|
||||
video_stats.last_backlog_reason[0] == '\0' ? "-" : video_stats.last_backlog_reason,
|
||||
video_stats.transport.srtt_ms,
|
||||
control_stats.registered,
|
||||
control_stats.server_idle_ms,
|
||||
(unsigned long long) control_stats.reconnect_count,
|
||||
(unsigned long long) control_stats.packets_forwarded,
|
||||
(unsigned long long) control_stats.invalid_packets,
|
||||
(unsigned long long) control_stats.unix_send_errors,
|
||||
control_stats.transport.srtt_ms
|
||||
control_stats.transport.srtt_ms,
|
||||
control_stats.last_reconnect_reason[0] == '\0' ? "-" : control_stats.last_reconnect_reason
|
||||
);
|
||||
}
|
||||
|
||||
@@ -429,6 +563,10 @@ int main(void) {
|
||||
state.control_peer_id = env_or_default("OMNI_CONTROL_PEER_ID", CONTROL_DEFAULT_PEER_ID);
|
||||
state.control_expected_sender = env_or_default("OMNI_CONTROL_EXPECTED_SENDER", CONTROL_DEFAULT_EXPECTED_SENDER);
|
||||
state.control_unix_socket = env_or_default("OMNI_CONTROL_UNIX_SOCKET_PATH", CONTROL_DEFAULT_UNIX_SOCKET);
|
||||
state.control_server_idle_reconnect_ms = env_int_or_default(
|
||||
"OMNI_CONTROL_SERVER_IDLE_RECONNECT_MS",
|
||||
CONTROL_DEFAULT_SERVER_IDLE_RECONNECT_MS
|
||||
);
|
||||
|
||||
if (state.video_config.server_addr == NULL || state.video_config.server_addr[0] == '\0' ||
|
||||
state.control_server_addr == NULL || state.control_server_addr[0] == '\0') {
|
||||
|
||||
Reference in New Issue
Block a user