#!/usr/bin/env bash set -euo pipefail script_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" repo_dir="$(cd "$script_dir/.." && pwd)" script_name="$(basename "$0")" run_mode="direct" server_ssh="" peerb_ssh="" relay_ssh="" server_addr="" relay_addr="" relay_remote="" log_prefix="" listen_addr="0.0.0.0:10909" relay_listen_addr="0.0.0.0:10909" server_workdir="$repo_dir" peerb_workdir="$repo_dir" relay_workdir="$repo_dir" local_workdir="$repo_dir" ready_timeout=60 send_interval=1 drain_wait=5 repeat_count=1 declare -a peerb_files=() server_started=0 relay_started=0 peer_b_started=0 peer_a_pid="" usage() { printf 'Usage:\n' printf ' %s --mode --server-ssh --peerb-ssh \\\n' "$script_name" printf ' --server-addr --log-prefix --file [options]\n' printf '\n' printf 'Modes:\n' printf ' direct peer-a -> hub(server) <- peer-b (default)\n' printf ' relay peer-a -> relay(C) -> hub(D) <- peer-b\n' printf '\n' printf 'Required arguments:\n' printf ' --server-ssh SSH target for the hub server machine\n' printf ' --peerb-ssh SSH target for the peer-b machine\n' printf ' --server-addr Hub server IP (combined with listen port for peers)\n' printf ' --log-prefix Log directory prefix; logs go under logs/\n' printf ' --file Existing file path on peer-b; repeat for multiple files\n' printf '\n' printf 'Relay mode arguments (required when --mode=relay):\n' printf ' --relay-ssh SSH target for the relay server machine\n' printf ' --relay-addr Relay server IP (combined with relay listen port for peer-a)\n' printf ' --relay-remote Hub address from relay perspective (relay -relay-remote)\n' printf '\n' printf 'Options:\n' printf ' --mode Run mode (default: %s)\n' "$run_mode" printf ' --listen-addr Hub server listen address (default: %s)\n' "$listen_addr" printf ' --relay-listen-addr Relay server listen address (default: %s)\n' "$relay_listen_addr" printf ' --server-workdir Hub server-side workdir (default: %s)\n' "$server_workdir" printf ' --relay-workdir Relay server-side workdir (default: %s)\n' "$relay_workdir" printf ' --peerb-workdir Peer-b-side workdir (default: %s)\n' "$peerb_workdir" printf ' --local-workdir Local peer-a workdir (default: %s)\n' "$local_workdir" printf ' --ready-timeout Startup wait timeout (default: %s)\n' "$ready_timeout" printf ' --repeat Repeat the full --file list this many rounds (default: %s)\n' "$repeat_count" printf ' --send-interval Delay between file commands (default: %s)\n' "$send_interval" printf ' --drain-wait Wait after the last file before quit (default: %s)\n' "$drain_wait" printf ' -h, --help Show this help\n' printf '\n' printf 'Example (direct mode):\n' printf ' %s \\\n' "$script_name" printf ' --mode direct \\\n' printf ' --server-ssh root@server-host \\\n' printf ' --peerb-ssh root@peer-b-host \\\n' printf ' --server-addr 203.0.113.10 \\\n' printf ' --log-prefix case01- \\\n' printf ' --repeat 30 \\\n' printf ' --file /tmp/test125.bin\n' printf '\n' printf 'Example (relay mode):\n' printf ' %s \\\n' "$script_name" printf ' --mode relay \\\n' printf ' --server-ssh root@hub-host \\\n' printf ' --relay-ssh root@relay-host \\\n' printf ' --peerb-ssh root@peer-b-host \\\n' printf ' --server-addr 152.136.164.246 \\\n' printf ' --relay-addr 139.199.57.110 \\\n' printf ' --relay-remote 172.21.0.13:10909 \\\n' printf ' --log-prefix case01- \\\n' printf ' --repeat 30 \\\n' printf ' --file /tmp/test125.bin\n' } log() { printf '[%s] %s\n' "$(date '+%Y-%m-%d %H:%M:%S')" "$*" } die() { printf >&2 '[%s] error: %s\n' "$(date '+%Y-%m-%d %H:%M:%S')" "$*" exit 1 } join_path() { local base="${1%/}" printf '%s/%s' "$base" "$2" } build_quoted_command() { local out_var="$1" shift local command="" local part="" local quoted="" for part in "$@"; do printf -v quoted '%q' "$part" if [[ -n "$command" ]]; then command+=" " fi command+="$quoted" done printf -v "$out_var" '%s' "$command" } run_remote_script() { local target="$1" local script="$2" shift 2 local parts=("env") local assignment="" for assignment in "$@"; do parts+=("$assignment") done parts+=("bash" "-s" "--") local remote_cmd="" build_quoted_command remote_cmd "${parts[@]}" ssh -T "$target" "$remote_cmd" <<<"$script" } validate_positive_integer() { local name="$1" local value="$2" if [[ ! "$value" =~ ^[1-9][0-9]*$ ]]; then die "$name must be a positive integer, got: $value" fi } validate_sleep_value() { local name="$1" local value="$2" if [[ ! "$value" =~ ^([0-9]+([.][0-9]+)?|[.][0-9]+)$ ]]; then die "$name must be a non-negative number understood by sleep, got: $value" fi } dump_local_log_head() { local path="$1" if [[ -f "$path" ]]; then sed -n '1,120p' "$path" >&2 || true fi } dump_remote_log_head() { local target="$1" local log_file="$2" local label="$3" local script="" script="$(cat <<'EOF' set -euo pipefail if [[ -f "$LOG_FILE" ]]; then sed -n '1,120p' "$LOG_FILE" fi EOF )" log "showing $label log head from $target" run_remote_script "$target" "$script" "LOG_FILE=$log_file" || true } check_local_dependencies() { command -v ssh >/dev/null 2>&1 || die "ssh is required" command -v scp >/dev/null 2>&1 || die "scp is required" command -v go >/dev/null 2>&1 || die "go is required for local peer-a" } copy_remote_file_to_local() { local remote_source="$1" local local_dest="$2" local local_dir="" local local_tmp="" local_dir="$(dirname "$local_dest")" mkdir -p "$local_dir" local_tmp="$(mktemp "$local_dir/.copy.tmp.XXXXXX")" if scp "$remote_source" "$local_tmp"; then mv -f "$local_tmp" "$local_dest" else local status=$? rm -f "$local_tmp" return "$status" fi } remove_local_log_dir() { if [[ -e "$local_log_dir" ]]; then log "removing local log dir: $local_log_dir" rm -rf "$local_log_dir" fi } remove_remote_log_dir() { local target="$1" local log_dir="$2" local label="$3" local pid_file="${4:-}" local script="" script="$(cat <<'EOF' set -euo pipefail if [[ -n "${PID_FILE:-}" && -f "$PID_FILE" ]]; then existing_pid="$(<"$PID_FILE")" if [[ -n "$existing_pid" ]] && kill -0 "$existing_pid" 2>/dev/null; then printf >&2 'refusing to remove log dir while process %s is still running\n' "$existing_pid" exit 1 fi fi rm -rf "$LOG_DIR" EOF )" log "removing $label log dir on $target: $log_dir" run_remote_script "$target" "$script" \ "LOG_DIR=$log_dir" \ "PID_FILE=$pid_file" } clean_log_directories() { remove_local_log_dir remove_remote_log_dir "$server_ssh" "$server_log_dir" "server" "$server_pid_file" remove_remote_log_dir "$peerb_ssh" "$peerb_log_dir" "peer-b" if [[ "$run_mode" == "relay" ]]; then remove_remote_log_dir "$relay_ssh" "$relay_log_dir" "relay" "$relay_pid_file" fi } truncate_local_file() { local path="$1" local dir="" dir="$(dirname "$path")" mkdir -p "$dir" : > "$path" } truncate_remote_file() { local target="$1" local path="$2" local script="" script="$(cat <<'EOF' set -euo pipefail mkdir -p "$(dirname "$FILE_PATH")" : > "$FILE_PATH" EOF )" run_remote_script "$target" "$script" "FILE_PATH=$path" } reset_logs_after_probe() { log "resetting peer logs after connectivity probe" rm -f "$local_peer_a_messages_log" truncate_local_file "$local_peer_a_stdout_log" truncate_local_file "$local_peer_a_latency_log" truncate_local_file "$local_peer_a_ts_debug_log" truncate_local_file "$local_peer_a_session_stats_log" truncate_remote_file "$peerb_ssh" "$peerb_stdout_log" truncate_remote_file "$peerb_ssh" "$peerb_latency_log" truncate_remote_file "$peerb_ssh" "$peerb_ts_debug_log" truncate_remote_file "$peerb_ssh" "$peerb_session_stats_log" } fetch_remote_peer_b_logs() { log "copying peer-b latency log from $peerb_ssh:$peerb_latency_log to $local_peer_b_latency_log" copy_remote_file_to_local "$peerb_ssh:$peerb_latency_log" "$local_peer_b_latency_log" } run_local_latency_summary() { [[ -f "$local_peer_a_latency_log" ]] || die "local peer-a latency log not found: $local_peer_a_latency_log" [[ -f "$local_peer_b_latency_log" ]] || die "local peer-b latency log not found: $local_peer_b_latency_log" log "generating local latency summary: $local_kcp_latency_summary_log" ( cd "$repo_dir" exec go run ./cmd/latencysummary \ -input "$local_peer_a_latency_log" \ -input "$local_peer_b_latency_log" \ -output "$local_kcp_latency_summary_log" ) } check_remote_peerb_files() { local script="" local file="" script="$(cat <<'EOF' set -euo pipefail cd "$PEERB_WORKDIR" if [[ ! -f "$FILE_PATH" ]]; then printf >&2 'peer-b file not found: %s\n' "$FILE_PATH" exit 1 fi EOF )" for file in "${peerb_files[@]}"; do log "checking peer-b file exists: $file" run_remote_script "$peerb_ssh" "$script" \ "PEERB_WORKDIR=$peerb_workdir" \ "FILE_PATH=$file" done } start_remote_server() { local script="" script="$(cat <<'EOF' export PATH="$PATH:/usr/local/go/bin:$HOME/go/bin" set -euo pipefail cd "$SERVER_WORKDIR" mkdir -p "$LOG_DIR" if [[ -f "$PID_FILE" ]]; then existing_pid="$(<"$PID_FILE")" if [[ -n "$existing_pid" ]] && kill -0 "$existing_pid" 2>/dev/null; then printf >&2 'server already running with pid %s\n' "$existing_pid" exit 1 fi fi : > "$STDOUT_LOG" setsid go run ./cmd/kcpserver/ \ -listen "$LISTEN_ADDR" \ >>"$STDOUT_LOG" 2>&1 "$PID_FILE" EOF )" log "starting remote kcpserver (hub) on $server_ssh" run_remote_script "$server_ssh" "$script" \ "SERVER_WORKDIR=$server_workdir" \ "LOG_DIR=$server_log_dir" \ "PID_FILE=$server_pid_file" \ "STDOUT_LOG=$server_stdout_log" \ "LISTEN_ADDR=$listen_addr" server_started=1 } wait_for_remote_server_ready() { local pattern="kcp hub listening" local script="" local start_time="$SECONDS" local status=0 script="$(cat <<'EOF' set -euo pipefail if [[ -f "$LOG_FILE" ]] && grep -Fq -- "$READY_PATTERN" "$LOG_FILE"; then exit 0 fi if [[ -f "$PID_FILE" ]]; then pid="$(<"$PID_FILE")" if [[ -n "$pid" ]] && kill -0 "$pid" 2>/dev/null; then exit 10 fi fi exit 20 EOF )" while (( SECONDS - start_time < ready_timeout )); do status=0 run_remote_script "$server_ssh" "$script" \ "LOG_FILE=$server_stdout_log" \ "READY_PATTERN=$pattern" \ "PID_FILE=$server_pid_file" || status=$? case "$status" in 0) log "remote server is ready" return 0 ;; 10) sleep 1 ;; 20) log "remote server exited before readiness" dump_remote_log_head "$server_ssh" "$server_stdout_log" "server" return 1 ;; *) log "remote server readiness check failed with status $status" dump_remote_log_head "$server_ssh" "$server_stdout_log" "server" return 1 ;; esac done log "timed out waiting for remote server readiness after ${ready_timeout}s" dump_remote_log_head "$server_ssh" "$server_stdout_log" "server" return 1 } stop_remote_server() { local script="" script="$(cat <<'EOF' set -euo pipefail if [[ ! -f "$PID_FILE" ]]; then exit 0 fi pid="$(<"$PID_FILE")" if [[ -z "$pid" ]]; then rm -f "$PID_FILE" exit 0 fi # Kill the entire process group (setsid creates a new group with pid == pgid). kill -- -"$pid" 2>/dev/null || kill "$pid" 2>/dev/null || true for _ in 1 2 3 4 5; do if ! kill -0 "$pid" 2>/dev/null; then rm -f "$PID_FILE" exit 0 fi sleep 1 done kill -9 -- -"$pid" 2>/dev/null || kill -9 "$pid" 2>/dev/null || true rm -f "$PID_FILE" EOF )" run_remote_script "$server_ssh" "$script" "PID_FILE=$server_pid_file" } start_remote_relay() { local script="" script="$(cat <<'EOF' export PATH="$PATH:/usr/local/go/bin:$HOME/go/bin" set -euo pipefail cd "$RELAY_WORKDIR" mkdir -p "$LOG_DIR" if [[ -f "$PID_FILE" ]]; then existing_pid="$(<"$PID_FILE")" if [[ -n "$existing_pid" ]] && kill -0 "$existing_pid" 2>/dev/null; then printf >&2 'relay already running with pid %s\n' "$existing_pid" exit 1 fi fi : > "$STDOUT_LOG" setsid go run ./cmd/kcpserver/ \ -mode=relay \ -listen "$LISTEN_ADDR" \ -relay-remote "$RELAY_REMOTE" \ >>"$STDOUT_LOG" 2>&1 "$PID_FILE" EOF )" log "starting remote relay on $relay_ssh" run_remote_script "$relay_ssh" "$script" \ "RELAY_WORKDIR=$relay_workdir" \ "LOG_DIR=$relay_log_dir" \ "PID_FILE=$relay_pid_file" \ "STDOUT_LOG=$relay_stdout_log" \ "LISTEN_ADDR=$relay_listen_addr" \ "RELAY_REMOTE=$relay_remote" relay_started=1 } wait_for_remote_relay_ready() { local pattern="udp relay listening" local script="" local start_time="$SECONDS" local status=0 script="$(cat <<'EOF' set -euo pipefail if [[ -f "$LOG_FILE" ]] && grep -Fq -- "$READY_PATTERN" "$LOG_FILE"; then exit 0 fi if [[ -f "$PID_FILE" ]]; then pid="$(<"$PID_FILE")" if [[ -n "$pid" ]] && kill -0 "$pid" 2>/dev/null; then exit 10 fi fi exit 20 EOF )" while (( SECONDS - start_time < ready_timeout )); do status=0 run_remote_script "$relay_ssh" "$script" \ "LOG_FILE=$relay_stdout_log" \ "READY_PATTERN=$pattern" \ "PID_FILE=$relay_pid_file" || status=$? case "$status" in 0) log "remote relay is ready" return 0 ;; 10) sleep 1 ;; 20) log "remote relay exited before readiness" dump_remote_log_head "$relay_ssh" "$relay_stdout_log" "relay" return 1 ;; *) log "remote relay readiness check failed with status $status" dump_remote_log_head "$relay_ssh" "$relay_stdout_log" "relay" return 1 ;; esac done log "timed out waiting for remote relay readiness after ${ready_timeout}s" dump_remote_log_head "$relay_ssh" "$relay_stdout_log" "relay" return 1 } stop_remote_relay() { local script="" script="$(cat <<'EOF' set -euo pipefail if [[ ! -f "$PID_FILE" ]]; then exit 0 fi pid="$(<"$PID_FILE")" if [[ -z "$pid" ]]; then rm -f "$PID_FILE" exit 0 fi kill -- -"$pid" 2>/dev/null || kill "$pid" 2>/dev/null || true for _ in 1 2 3 4 5; do if ! kill -0 "$pid" 2>/dev/null; then rm -f "$PID_FILE" exit 0 fi sleep 1 done kill -9 -- -"$pid" 2>/dev/null || kill -9 "$pid" 2>/dev/null || true rm -f "$PID_FILE" EOF )" run_remote_script "$relay_ssh" "$script" "PID_FILE=$relay_pid_file" } start_local_peer_a() { log "starting local peer-a" mkdir -p "$local_log_dir" "$local_peer_a_inbox" : > "$local_peer_a_stdout_log" local peer_a_args=( -id peer-a -server "$server_connect_addr" -inbox-dir "$local_peer_a_inbox" -latency-log "$local_peer_a_latency_log" -kcp-ts-debug-log "$local_peer_a_ts_debug_log" -kcp-session-stats-log "$local_peer_a_session_stats_log" -interactive=false ) if [[ "$run_mode" == "relay" ]]; then peer_a_args+=(-relay-via "$relay_connect_addr") fi ( cd "$local_workdir" exec go run ./cmd/kcppeer "${peer_a_args[@]}" \ >>"$local_peer_a_stdout_log" 2>&1 ) & peer_a_pid="$!" } wait_for_local_peer_a_ready() { local pattern="opened KCP session as peer-a" local start_time="$SECONDS" while (( SECONDS - start_time < ready_timeout )); do if [[ -f "$local_peer_a_stdout_log" ]] && grep -Fq -- "$pattern" "$local_peer_a_stdout_log"; then log "local peer-a is ready" return 0 fi if [[ -n "$peer_a_pid" ]] && ! kill -0 "$peer_a_pid" 2>/dev/null; then log "local peer-a exited before readiness" dump_local_log_head "$local_peer_a_stdout_log" return 1 fi sleep 1 done log "timed out waiting for local peer-a readiness after ${ready_timeout}s" dump_local_log_head "$local_peer_a_stdout_log" return 1 } stop_local_peer_a() { if [[ -z "$peer_a_pid" ]]; then return 0 fi if kill -0 "$peer_a_pid" 2>/dev/null; then kill "$peer_a_pid" 2>/dev/null || true wait "$peer_a_pid" 2>/dev/null || true else wait "$peer_a_pid" 2>/dev/null || true fi peer_a_pid="" } start_remote_peer_b() { local script="" script="$(cat <<'EOF' export PATH="$PATH:/usr/local/go/bin:$HOME/go/bin" set -euo pipefail cd "$PEERB_WORKDIR" mkdir -p "$LOG_DIR" "$INBOX_DIR" if [[ -f "$PID_FILE" ]]; then existing_pid="$(<"$PID_FILE")" if [[ -n "$existing_pid" ]] && kill -0 "$existing_pid" 2>/dev/null; then printf >&2 'peer-b already running with pid %s\n' "$existing_pid" exit 1 fi fi : > "$STDOUT_LOG" : > "$COMMAND_FILE" peer_b_cmd="$(cat <<'INNER' tail -n +1 -f "$COMMAND_FILE" | exec go run ./cmd/kcppeer/ \ -id peer-b \ -server "$SERVER_ADDR" \ -inbox-dir "$INBOX_DIR" \ -latency-log "$LATENCY_LOG" \ -kcp-ts-debug-log "$TS_DEBUG_LOG" \ -kcp-session-stats-log "$SESSION_STATS_LOG" INNER )" nohup setsid bash -lc "$peer_b_cmd" >>"$STDOUT_LOG" 2>&1 "$PID_FILE" EOF )" log "starting remote peer-b on $peerb_ssh" run_remote_script "$peerb_ssh" "$script" \ "PEERB_WORKDIR=$peerb_workdir" \ "LOG_DIR=$peerb_log_dir" \ "INBOX_DIR=$peerb_inbox_dir" \ "STDOUT_LOG=$peerb_stdout_log" \ "COMMAND_FILE=$peerb_command_file" \ "PID_FILE=$peerb_pid_file" \ "SERVER_ADDR=$server_connect_addr" \ "LATENCY_LOG=$peerb_latency_log" \ "TS_DEBUG_LOG=$peerb_ts_debug_log" \ "SESSION_STATS_LOG=$peerb_session_stats_log" peer_b_started=1 } wait_for_remote_peer_b_ready() { local pattern="opened KCP session as peer-b" local script="" local start_time="$SECONDS" local status=0 script="$(cat <<'EOF' set -euo pipefail if [[ -f "$LOG_FILE" ]] && grep -Fq -- "$READY_PATTERN" "$LOG_FILE"; then exit 0 fi if [[ -f "$PID_FILE" ]]; then pid="$(<"$PID_FILE")" if [[ -n "$pid" ]] && kill -0 "$pid" 2>/dev/null; then exit 10 fi fi exit 20 EOF )" while (( SECONDS - start_time < ready_timeout )); do status=0 run_remote_script "$peerb_ssh" "$script" \ "LOG_FILE=$peerb_stdout_log" \ "READY_PATTERN=$pattern" \ "PID_FILE=$peerb_pid_file" || status=$? case "$status" in 0) log "remote peer-b is ready" return 0 ;; 10) sleep 1 ;; 20) log "remote peer-b exited before readiness" dump_remote_log_head "$peerb_ssh" "$peerb_stdout_log" "peer-b" return 1 ;; *) log "remote peer-b readiness check failed with status $status" dump_remote_log_head "$peerb_ssh" "$peerb_stdout_log" "peer-b" return 1 ;; esac done log "timed out waiting for remote peer-b readiness after ${ready_timeout}s" dump_remote_log_head "$peerb_ssh" "$peerb_stdout_log" "peer-b" return 1 } probe_peer_b_to_local_peer_a() { local marker="" local command_line="" local quoted_command="" local script="" local start_time="$SECONDS" marker="probe-$(date +%s)-$$" printf -v command_line 'text peer-a %s' "$marker" printf -v quoted_command '%q' "$command_line" script="$(cat <> "\$COMMAND_FILE" EOF )" log "probing peer-b -> peer-a message delivery before batch" run_remote_script "$peerb_ssh" "$script" "COMMAND_FILE=$peerb_command_file" while (( SECONDS - start_time < ready_timeout )); do if [[ -f "$local_peer_a_messages_log" ]] && grep -Fq -- "$marker" "$local_peer_a_messages_log"; then log "peer-b -> peer-a probe succeeded" reset_logs_after_probe return 0 fi if [[ -n "$peer_a_pid" ]] && ! kill -0 "$peer_a_pid" 2>/dev/null; then log "local peer-a exited during connectivity probe" dump_local_log_head "$local_peer_a_stdout_log" return 1 fi sleep 1 done log "timed out waiting for peer-b -> peer-a probe delivery after ${ready_timeout}s" dump_local_log_head "$local_peer_a_stdout_log" dump_remote_log_head "$peerb_ssh" "$peerb_stdout_log" "peer-b" return 1 } run_remote_peer_b_batch() { local script="" local batch_commands="" local round=0 local i=0 local send_index=0 local total_sends=$(( ${#peerb_files[@]} * repeat_count )) local file="" local command_line="" local quoted_command="" local quoted_sleep="" for (( round = 1; round <= repeat_count; round++ )); do for (( i = 0; i < ${#peerb_files[@]}; i++ )); do file="${peerb_files[$i]}" send_index=$(( send_index + 1 )) log "queueing peer-b -> peer-a file (round $round/$repeat_count, send $send_index/$total_sends): $file" printf -v command_line 'file peer-a %s' "$file" printf -v quoted_command '%q' "$command_line" batch_commands+="printf '%s\n' ${quoted_command} >> \"\$COMMAND_FILE\""$'\n' if (( send_index < total_sends )); then printf -v quoted_sleep '%q' "$send_interval" batch_commands+="sleep ${quoted_sleep}"$'\n' fi done done printf -v quoted_sleep '%q' "$drain_wait" batch_commands+="sleep ${quoted_sleep}"$'\n' batch_commands+="printf '%s\n' quit >> \"\$COMMAND_FILE\""$'\n' script="$(cat <&2 'peer-b pid file not found: %s\n' "\$PID_FILE" exit 1 fi pid="\$(<"\$PID_FILE")" if [[ -z "\$pid" ]] || ! kill -0 "\$pid" 2>/dev/null; then printf >&2 'peer-b is not running\n' exit 1 fi $batch_commands for (( i = 0; i < READY_TIMEOUT; i++ )); do if ! kill -0 "\$pid" 2>/dev/null; then rm -f "\$PID_FILE" "\$COMMAND_FILE" exit 0 fi sleep 1 done printf >&2 'peer-b did not exit after quit within %s seconds\n' "\$READY_TIMEOUT" exit 1 EOF )" log "sending ${#peerb_files[@]} files across $repeat_count rounds ($total_sends sends total) from peer-b" run_remote_script "$peerb_ssh" "$script" \ "PID_FILE=$peerb_pid_file" \ "COMMAND_FILE=$peerb_command_file" \ "READY_TIMEOUT=$ready_timeout" peer_b_started=0 } stop_remote_peer_b() { local script="" script="$(cat <<'EOF' set -euo pipefail if [[ ! -f "$PID_FILE" ]]; then rm -f "$COMMAND_FILE" exit 0 fi pid="$(<"$PID_FILE")" if [[ -z "$pid" ]]; then rm -f "$PID_FILE" "$COMMAND_FILE" exit 0 fi if kill -0 "$pid" 2>/dev/null; then printf 'quit\n' >> "$COMMAND_FILE" 2>/dev/null || true for _ in 1 2 3 4 5; do if ! kill -0 "$pid" 2>/dev/null; then rm -f "$PID_FILE" "$COMMAND_FILE" exit 0 fi sleep 1 done kill -- -"$pid" 2>/dev/null || kill "$pid" 2>/dev/null || true for _ in 1 2 3 4 5; do if ! kill -0 "$pid" 2>/dev/null; then rm -f "$PID_FILE" "$COMMAND_FILE" exit 0 fi sleep 1 done kill -9 -- -"$pid" 2>/dev/null || kill -9 "$pid" 2>/dev/null || true fi rm -f "$PID_FILE" "$COMMAND_FILE" EOF )" run_remote_script "$peerb_ssh" "$script" \ "PID_FILE=$peerb_pid_file" \ "COMMAND_FILE=$peerb_command_file" } cleanup() { local exit_code="$?" trap - EXIT INT TERM if [[ -n "$peer_a_pid" ]]; then log "stopping local peer-a" stop_local_peer_a fi if (( peer_b_started == 1 )); then log "stopping remote peer-b on $peerb_ssh" stop_remote_peer_b || true fi if (( relay_started == 1 )); then log "stopping remote relay on $relay_ssh" stop_remote_relay || true fi if (( server_started == 1 )); then log "stopping remote server on $server_ssh" stop_remote_server || true fi exit "$exit_code" } handle_interrupt() { log "received interrupt signal" exit 130 } handle_terminate() { log "received terminate signal" exit 143 } while [[ $# -gt 0 ]]; do case "$1" in --mode) [[ $# -ge 2 ]] || die "--mode requires a value" run_mode="$2" shift 2 ;; --server-ssh) [[ $# -ge 2 ]] || die "--server-ssh requires a value" server_ssh="$2" shift 2 ;; --peerb-ssh) [[ $# -ge 2 ]] || die "--peerb-ssh requires a value" peerb_ssh="$2" shift 2 ;; --relay-ssh) [[ $# -ge 2 ]] || die "--relay-ssh requires a value" relay_ssh="$2" shift 2 ;; --server-addr) [[ $# -ge 2 ]] || die "--server-addr requires a value" server_addr="$2" shift 2 ;; --relay-addr) [[ $# -ge 2 ]] || die "--relay-addr requires a value" relay_addr="$2" shift 2 ;; --relay-remote) [[ $# -ge 2 ]] || die "--relay-remote requires a value" relay_remote="$2" shift 2 ;; --log-prefix) [[ $# -ge 2 ]] || die "--log-prefix requires a value" log_prefix="$2" shift 2 ;; --listen-addr) [[ $# -ge 2 ]] || die "--listen-addr requires a value" listen_addr="$2" shift 2 ;; --relay-listen-addr) [[ $# -ge 2 ]] || die "--relay-listen-addr requires a value" relay_listen_addr="$2" shift 2 ;; --server-workdir) [[ $# -ge 2 ]] || die "--server-workdir requires a value" server_workdir="$2" shift 2 ;; --relay-workdir) [[ $# -ge 2 ]] || die "--relay-workdir requires a value" relay_workdir="$2" shift 2 ;; --peerb-workdir) [[ $# -ge 2 ]] || die "--peerb-workdir requires a value" peerb_workdir="$2" shift 2 ;; --local-workdir) [[ $# -ge 2 ]] || die "--local-workdir requires a value" local_workdir="$2" shift 2 ;; --ready-timeout) [[ $# -ge 2 ]] || die "--ready-timeout requires a value" ready_timeout="$2" shift 2 ;; --repeat) [[ $# -ge 2 ]] || die "--repeat requires a value" repeat_count="$2" shift 2 ;; --send-interval) [[ $# -ge 2 ]] || die "--send-interval requires a value" send_interval="$2" shift 2 ;; --drain-wait) [[ $# -ge 2 ]] || die "--drain-wait requires a value" drain_wait="$2" shift 2 ;; --file) [[ $# -ge 2 ]] || die "--file requires a value" peerb_files+=("$2") shift 2 ;; -h|--help) usage exit 0 ;; *) die "unknown argument: $1" ;; esac done [[ "$run_mode" == "direct" || "$run_mode" == "relay" ]] || die "--mode must be 'direct' or 'relay', got: $run_mode" [[ -n "$server_ssh" ]] || die "--server-ssh is required" [[ -n "$peerb_ssh" ]] || die "--peerb-ssh is required" [[ -n "$server_addr" ]] || die "--server-addr is required" [[ -n "$log_prefix" ]] || die "--log-prefix is required" (( ${#peerb_files[@]} > 0 )) || die "at least one --file is required" if [[ "$run_mode" == "relay" ]]; then [[ -n "$relay_ssh" ]] || die "--relay-ssh is required in relay mode" [[ -n "$relay_addr" ]] || die "--relay-addr is required in relay mode" [[ -n "$relay_remote" ]] || die "--relay-remote is required in relay mode" fi validate_positive_integer "--ready-timeout" "$ready_timeout" validate_positive_integer "--repeat" "$repeat_count" validate_sleep_value "--send-interval" "$send_interval" validate_sleep_value "--drain-wait" "$drain_wait" check_local_dependencies # Extract ports and build peer connection addresses. server_port="${listen_addr##*:}" server_connect_addr="${server_addr}:${server_port}" relay_connect_addr="" if [[ "$run_mode" == "relay" ]]; then relay_port="${relay_listen_addr##*:}" relay_connect_addr="${relay_addr}:${relay_port}" fi log_dir_name="${log_prefix}logs" inbox_dir_name="${log_prefix}inbox" local_log_dir="$(join_path "$local_workdir" "$log_dir_name")" local_peer_a_inbox="$(join_path "$local_workdir" "$inbox_dir_name/peer-a")" local_peer_a_messages_log="$(join_path "$local_peer_a_inbox" "messages.log")" local_peer_a_stdout_log="$(join_path "$local_log_dir" "peer-a.stdout.log")" local_peer_a_latency_log="$(join_path "$local_log_dir" "peer-a-kcp-latency.jsonl")" local_peer_a_ts_debug_log="$(join_path "$local_log_dir" "peer-a-kcp-packet-debug.jsonl")" local_peer_a_session_stats_log="$(join_path "$local_log_dir" "peer-a-kcp-session-stats.jsonl")" local_peer_b_stdout_log="$(join_path "$local_log_dir" "peer-b.stdout.log")" local_peer_b_latency_log="$(join_path "$local_log_dir" "peer-b-kcp-latency.jsonl")" local_peer_b_ts_debug_log="$(join_path "$local_log_dir" "peer-b-kcp-packet-debug.jsonl")" local_peer_b_session_stats_log="$(join_path "$local_log_dir" "peer-b-kcp-session-stats.jsonl")" local_kcp_latency_summary_log="$(join_path "$local_log_dir" "kcp-latency-summary.jsonl")" server_log_dir="$(join_path "$server_workdir" "$log_dir_name")" server_pid_file="$(join_path "$server_log_dir" "server.pid")" server_stdout_log="$(join_path "$server_log_dir" "server.stdout.log")" relay_log_dir="" relay_pid_file="" relay_stdout_log="" if [[ "$run_mode" == "relay" ]]; then relay_log_dir="$(join_path "$relay_workdir" "$log_dir_name")" relay_pid_file="$(join_path "$relay_log_dir" "relay.pid")" relay_stdout_log="$(join_path "$relay_log_dir" "relay.stdout.log")" fi peerb_log_dir="$(join_path "$peerb_workdir" "$log_dir_name")" peerb_inbox_dir="$(join_path "$peerb_workdir" "$inbox_dir_name/peer-b")" peerb_stdout_log="$(join_path "$peerb_log_dir" "peer-b.stdout.log")" peerb_latency_log="$(join_path "$peerb_log_dir" "peer-b-kcp-latency.jsonl")" peerb_ts_debug_log="$(join_path "$peerb_log_dir" "peer-b-kcp-packet-debug.jsonl")" peerb_session_stats_log="$(join_path "$peerb_log_dir" "peer-b-kcp-session-stats.jsonl")" peerb_pid_file="$(join_path "$peerb_log_dir" "peer-b.pid")" peerb_command_file="$(join_path "$peerb_log_dir" "peer-b.commands")" trap cleanup EXIT trap handle_interrupt INT trap handle_terminate TERM clean_log_directories mkdir -p "$local_log_dir" "$local_peer_a_inbox" log "run mode: $run_mode" log "local peer-a logs: $local_log_dir" log "remote server logs: $server_log_dir" if [[ "$run_mode" == "relay" ]]; then log "remote relay logs: $relay_log_dir" fi log "remote peer-b logs: $peerb_log_dir" check_remote_peerb_files start_remote_server wait_for_remote_server_ready if [[ "$run_mode" == "relay" ]]; then start_remote_relay wait_for_remote_relay_ready fi start_local_peer_a start_remote_peer_b wait_for_local_peer_a_ready wait_for_remote_peer_b_ready probe_peer_b_to_local_peer_a run_remote_peer_b_batch log "batch send completed" if [[ -n "$peer_a_pid" ]]; then log "stopping local peer-a after batch" stop_local_peer_a fi if (( relay_started == 1 )); then log "stopping remote relay on $relay_ssh after batch" if stop_remote_relay; then relay_started=0 else log "failed to stop remote relay cleanly; cleanup will retry" fi fi if (( server_started == 1 )); then log "stopping remote server on $server_ssh after batch" if stop_remote_server; then server_started=0 else log "failed to stop remote server cleanly; cleanup will retry" fi fi fetch_remote_peer_b_logs run_local_latency_summary