diff --git a/scripts/run-kcp-batch-test.sh b/scripts/run-kcp-batch-test.sh index 676169b..29c29d7 100755 --- a/scripts/run-kcp-batch-test.sh +++ b/scripts/run-kcp-batch-test.sh @@ -6,13 +6,19 @@ script_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" repo_dir="$(cd "$script_dir/.." && pwd)" script_name="$(basename "$0")" +run_mode="direct" server_ssh="" peerb_ssh="" +relay_ssh="" server_addr="" +relay_addr="" +relay_remote="" log_prefix="" listen_addr="0.0.0.0:10909" +relay_listen_addr="0.0.0.0:10909" server_workdir="$repo_dir" peerb_workdir="$repo_dir" +relay_workdir="$repo_dir" local_workdir="$repo_dir" ready_timeout=60 send_interval=1 @@ -22,24 +28,37 @@ repeat_count=1 declare -a peerb_files=() server_started=0 +relay_started=0 peer_b_started=0 peer_a_pid="" usage() { printf 'Usage:\n' - printf ' %s --server-ssh --peerb-ssh --server-addr \\\n' "$script_name" - printf ' --log-prefix --file [--file ...] [options]\n' + printf ' %s --mode --server-ssh --peerb-ssh \\\n' "$script_name" + printf ' --server-addr --log-prefix --file [options]\n' + printf '\n' + printf 'Modes:\n' + printf ' direct peer-a -> hub(server) <- peer-b (default)\n' + printf ' relay peer-a -> relay(C) -> hub(D) <- peer-b\n' printf '\n' printf 'Required arguments:\n' - printf ' --server-ssh SSH target for the server machine\n' + printf ' --server-ssh SSH target for the hub server machine\n' printf ' --peerb-ssh SSH target for the peer-b machine\n' - printf ' --server-addr Server address used by peer-a and peer-b\n' + printf ' --server-addr Hub server IP (combined with listen port for peers)\n' printf ' --log-prefix Log directory prefix; logs go under logs/\n' printf ' --file Existing file path on peer-b; repeat for multiple files\n' printf '\n' + printf 'Relay mode arguments (required when --mode=relay):\n' + printf ' --relay-ssh SSH target for the relay server machine\n' + printf ' --relay-addr Relay server IP (combined with relay listen port for peer-a)\n' + printf ' --relay-remote Hub address from relay perspective (relay -relay-remote)\n' + printf '\n' printf 'Options:\n' - printf ' --listen-addr Server listen address (default: %s)\n' "$listen_addr" - printf ' --server-workdir Server-side workdir (default: %s)\n' "$server_workdir" + printf ' --mode Run mode (default: %s)\n' "$run_mode" + printf ' --listen-addr Hub server listen address (default: %s)\n' "$listen_addr" + printf ' --relay-listen-addr Relay server listen address (default: %s)\n' "$relay_listen_addr" + printf ' --server-workdir Hub server-side workdir (default: %s)\n' "$server_workdir" + printf ' --relay-workdir Relay server-side workdir (default: %s)\n' "$relay_workdir" printf ' --peerb-workdir Peer-b-side workdir (default: %s)\n' "$peerb_workdir" printf ' --local-workdir Local peer-a workdir (default: %s)\n' "$local_workdir" printf ' --ready-timeout Startup wait timeout (default: %s)\n' "$ready_timeout" @@ -48,15 +67,28 @@ usage() { printf ' --drain-wait Wait after the last file before quit (default: %s)\n' "$drain_wait" printf ' -h, --help Show this help\n' printf '\n' - printf 'Example:\n' + printf 'Example (direct mode):\n' printf ' %s \\\n' "$script_name" + printf ' --mode direct \\\n' printf ' --server-ssh root@server-host \\\n' printf ' --peerb-ssh root@peer-b-host \\\n' - printf ' --server-addr 203.0.113.10:10909 \\\n' + printf ' --server-addr 203.0.113.10 \\\n' printf ' --log-prefix case01- \\\n' printf ' --repeat 30 \\\n' - printf ' --file /tmp/test125.bin \\\n' - printf ' --file /tmp/test5.bin\n' + printf ' --file /tmp/test125.bin\n' + printf '\n' + printf 'Example (relay mode):\n' + printf ' %s \\\n' "$script_name" + printf ' --mode relay \\\n' + printf ' --server-ssh root@hub-host \\\n' + printf ' --relay-ssh root@relay-host \\\n' + printf ' --peerb-ssh root@peer-b-host \\\n' + printf ' --server-addr 152.136.164.246 \\\n' + printf ' --relay-addr 139.199.57.110 \\\n' + printf ' --relay-remote 172.21.0.13:10909 \\\n' + printf ' --log-prefix case01- \\\n' + printf ' --repeat 30 \\\n' + printf ' --file /tmp/test125.bin\n' } log() { @@ -217,6 +249,9 @@ clean_log_directories() { remove_local_log_dir remove_remote_log_dir "$server_ssh" "$server_log_dir" "server" "$server_pid_file" remove_remote_log_dir "$peerb_ssh" "$peerb_log_dir" "peer-b" + if [[ "$run_mode" == "relay" ]]; then + remove_remote_log_dir "$relay_ssh" "$relay_log_dir" "relay" "$relay_pid_file" + fi } truncate_local_file() { @@ -260,11 +295,8 @@ reset_logs_after_probe() { } fetch_remote_peer_b_logs() { - log "copying peer-b logs from $peerb_ssh:$peerb_log_dir to $local_log_dir" - copy_remote_file_to_local "$peerb_ssh:$peerb_stdout_log" "$local_peer_b_stdout_log" + log "copying peer-b latency log from $peerb_ssh:$peerb_latency_log to $local_peer_b_latency_log" copy_remote_file_to_local "$peerb_ssh:$peerb_latency_log" "$local_peer_b_latency_log" - copy_remote_file_to_local "$peerb_ssh:$peerb_ts_debug_log" "$local_peer_b_ts_debug_log" - copy_remote_file_to_local "$peerb_ssh:$peerb_session_stats_log" "$local_peer_b_session_stats_log" } run_local_latency_summary() { @@ -308,6 +340,7 @@ start_remote_server() { local script="" script="$(cat <<'EOF' +export PATH="$PATH:/usr/local/go/bin:$HOME/go/bin" set -euo pipefail cd "$SERVER_WORKDIR" @@ -322,14 +355,14 @@ if [[ -f "$PID_FILE" ]]; then fi : > "$STDOUT_LOG" -nohup ./kcpserver \ +setsid go run ./cmd/kcpserver/ \ -listen "$LISTEN_ADDR" \ >>"$STDOUT_LOG" 2>&1 "$PID_FILE" EOF )" - log "starting remote kcpserver on $server_ssh" + log "starting remote kcpserver (hub) on $server_ssh" run_remote_script "$server_ssh" "$script" \ "SERVER_WORKDIR=$server_workdir" \ "LOG_DIR=$server_log_dir" \ @@ -341,7 +374,7 @@ EOF } wait_for_remote_server_ready() { - local pattern="kcp server listening" + local pattern="kcp hub listening" local script="" local start_time="$SECONDS" local status=0 @@ -365,16 +398,17 @@ EOF )" while (( SECONDS - start_time < ready_timeout )); do - if run_remote_script "$server_ssh" "$script" \ + status=0 + run_remote_script "$server_ssh" "$script" \ "LOG_FILE=$server_stdout_log" \ "READY_PATTERN=$pattern" \ - "PID_FILE=$server_pid_file"; then - log "remote server is ready" - return 0 - fi + "PID_FILE=$server_pid_file" || status=$? - status=$? case "$status" in + 0) + log "remote server is ready" + return 0 + ;; 10) sleep 1 ;; @@ -412,12 +446,8 @@ if [[ -z "$pid" ]]; then exit 0 fi -if ! kill -0 "$pid" 2>/dev/null; then - rm -f "$PID_FILE" - exit 0 -fi - -kill "$pid" 2>/dev/null || true +# Kill the entire process group (setsid creates a new group with pid == pgid). +kill -- -"$pid" 2>/dev/null || kill "$pid" 2>/dev/null || true for _ in 1 2 3 4 5; do if ! kill -0 "$pid" 2>/dev/null; then rm -f "$PID_FILE" @@ -426,7 +456,7 @@ for _ in 1 2 3 4 5; do sleep 1 done -kill -9 "$pid" 2>/dev/null || true +kill -9 -- -"$pid" 2>/dev/null || kill -9 "$pid" 2>/dev/null || true rm -f "$PID_FILE" EOF )" @@ -434,21 +464,158 @@ EOF run_remote_script "$server_ssh" "$script" "PID_FILE=$server_pid_file" } +start_remote_relay() { + local script="" + + script="$(cat <<'EOF' +export PATH="$PATH:/usr/local/go/bin:$HOME/go/bin" +set -euo pipefail + +cd "$RELAY_WORKDIR" +mkdir -p "$LOG_DIR" + +if [[ -f "$PID_FILE" ]]; then + existing_pid="$(<"$PID_FILE")" + if [[ -n "$existing_pid" ]] && kill -0 "$existing_pid" 2>/dev/null; then + printf >&2 'relay already running with pid %s\n' "$existing_pid" + exit 1 + fi +fi + +: > "$STDOUT_LOG" +setsid go run ./cmd/kcpserver/ \ + -mode=relay \ + -listen "$LISTEN_ADDR" \ + -relay-remote "$RELAY_REMOTE" \ + >>"$STDOUT_LOG" 2>&1 "$PID_FILE" +EOF +)" + + log "starting remote relay on $relay_ssh" + run_remote_script "$relay_ssh" "$script" \ + "RELAY_WORKDIR=$relay_workdir" \ + "LOG_DIR=$relay_log_dir" \ + "PID_FILE=$relay_pid_file" \ + "STDOUT_LOG=$relay_stdout_log" \ + "LISTEN_ADDR=$relay_listen_addr" \ + "RELAY_REMOTE=$relay_remote" + + relay_started=1 +} + +wait_for_remote_relay_ready() { + local pattern="udp relay listening" + local script="" + local start_time="$SECONDS" + local status=0 + + script="$(cat <<'EOF' +set -euo pipefail + +if [[ -f "$LOG_FILE" ]] && grep -Fq -- "$READY_PATTERN" "$LOG_FILE"; then + exit 0 +fi + +if [[ -f "$PID_FILE" ]]; then + pid="$(<"$PID_FILE")" + if [[ -n "$pid" ]] && kill -0 "$pid" 2>/dev/null; then + exit 10 + fi +fi + +exit 20 +EOF +)" + + while (( SECONDS - start_time < ready_timeout )); do + status=0 + run_remote_script "$relay_ssh" "$script" \ + "LOG_FILE=$relay_stdout_log" \ + "READY_PATTERN=$pattern" \ + "PID_FILE=$relay_pid_file" || status=$? + + case "$status" in + 0) + log "remote relay is ready" + return 0 + ;; + 10) + sleep 1 + ;; + 20) + log "remote relay exited before readiness" + dump_remote_log_head "$relay_ssh" "$relay_stdout_log" "relay" + return 1 + ;; + *) + log "remote relay readiness check failed with status $status" + dump_remote_log_head "$relay_ssh" "$relay_stdout_log" "relay" + return 1 + ;; + esac + done + + log "timed out waiting for remote relay readiness after ${ready_timeout}s" + dump_remote_log_head "$relay_ssh" "$relay_stdout_log" "relay" + return 1 +} + +stop_remote_relay() { + local script="" + + script="$(cat <<'EOF' +set -euo pipefail + +if [[ ! -f "$PID_FILE" ]]; then + exit 0 +fi + +pid="$(<"$PID_FILE")" +if [[ -z "$pid" ]]; then + rm -f "$PID_FILE" + exit 0 +fi + +kill -- -"$pid" 2>/dev/null || kill "$pid" 2>/dev/null || true +for _ in 1 2 3 4 5; do + if ! kill -0 "$pid" 2>/dev/null; then + rm -f "$PID_FILE" + exit 0 + fi + sleep 1 +done + +kill -9 -- -"$pid" 2>/dev/null || kill -9 "$pid" 2>/dev/null || true +rm -f "$PID_FILE" +EOF +)" + + run_remote_script "$relay_ssh" "$script" "PID_FILE=$relay_pid_file" +} + start_local_peer_a() { log "starting local peer-a" mkdir -p "$local_log_dir" "$local_peer_a_inbox" : > "$local_peer_a_stdout_log" + local peer_a_args=( + -id peer-a + -server "$server_connect_addr" + -inbox-dir "$local_peer_a_inbox" + -latency-log "$local_peer_a_latency_log" + -kcp-ts-debug-log "$local_peer_a_ts_debug_log" + -kcp-session-stats-log "$local_peer_a_session_stats_log" + -interactive=false + ) + + if [[ "$run_mode" == "relay" ]]; then + peer_a_args+=(-relay-via "$relay_connect_addr") + fi + ( cd "$local_workdir" - exec go run ./cmd/kcppeer \ - -id peer-a \ - -server "$server_addr" \ - -inbox-dir "$local_peer_a_inbox" \ - -latency-log "$local_peer_a_latency_log" \ - -kcp-ts-debug-log "$local_peer_a_ts_debug_log" \ - -kcp-session-stats-log "$local_peer_a_session_stats_log" \ - -interactive=false \ + exec go run ./cmd/kcppeer "${peer_a_args[@]}" \ >>"$local_peer_a_stdout_log" 2>&1 ) & @@ -456,7 +623,7 @@ start_local_peer_a() { } wait_for_local_peer_a_ready() { - local pattern="connected to $server_addr as peer-a (KCP)" + local pattern="opened KCP session as peer-a" local start_time="$SECONDS" while (( SECONDS - start_time < ready_timeout )); do @@ -498,6 +665,7 @@ start_remote_peer_b() { local script="" script="$(cat <<'EOF' +export PATH="$PATH:/usr/local/go/bin:$HOME/go/bin" set -euo pipefail cd "$PEERB_WORKDIR" @@ -515,7 +683,7 @@ fi : > "$COMMAND_FILE" peer_b_cmd="$(cat <<'INNER' - tail -n +1 -f "$COMMAND_FILE" | exec ./bin/kcppeer \ + tail -n +1 -f "$COMMAND_FILE" | exec go run ./cmd/kcppeer/ \ -id peer-b \ -server "$SERVER_ADDR" \ -inbox-dir "$INBOX_DIR" \ @@ -525,7 +693,7 @@ fi INNER )" -nohup bash -lc "$peer_b_cmd" >>"$STDOUT_LOG" 2>&1 >"$STDOUT_LOG" 2>&1 "$PID_FILE" EOF )" @@ -538,7 +706,7 @@ EOF "STDOUT_LOG=$peerb_stdout_log" \ "COMMAND_FILE=$peerb_command_file" \ "PID_FILE=$peerb_pid_file" \ - "SERVER_ADDR=$server_addr" \ + "SERVER_ADDR=$server_connect_addr" \ "LATENCY_LOG=$peerb_latency_log" \ "TS_DEBUG_LOG=$peerb_ts_debug_log" \ "SESSION_STATS_LOG=$peerb_session_stats_log" @@ -547,7 +715,7 @@ EOF } wait_for_remote_peer_b_ready() { - local pattern="connected to $server_addr as peer-b (KCP)" + local pattern="opened KCP session as peer-b" local script="" local start_time="$SECONDS" local status=0 @@ -571,16 +739,17 @@ EOF )" while (( SECONDS - start_time < ready_timeout )); do - if run_remote_script "$peerb_ssh" "$script" \ + status=0 + run_remote_script "$peerb_ssh" "$script" \ "LOG_FILE=$peerb_stdout_log" \ "READY_PATTERN=$pattern" \ - "PID_FILE=$peerb_pid_file"; then - log "remote peer-b is ready" - return 0 - fi + "PID_FILE=$peerb_pid_file" || status=$? - status=$? case "$status" in + 0) + log "remote peer-b is ready" + return 0 + ;; 10) sleep 1 ;; @@ -739,7 +908,7 @@ if kill -0 "$pid" 2>/dev/null; then fi sleep 1 done - kill "$pid" 2>/dev/null || true + kill -- -"$pid" 2>/dev/null || kill "$pid" 2>/dev/null || true for _ in 1 2 3 4 5; do if ! kill -0 "$pid" 2>/dev/null; then rm -f "$PID_FILE" "$COMMAND_FILE" @@ -747,7 +916,7 @@ if kill -0 "$pid" 2>/dev/null; then fi sleep 1 done - kill -9 "$pid" 2>/dev/null || true + kill -9 -- -"$pid" 2>/dev/null || kill -9 "$pid" 2>/dev/null || true fi rm -f "$PID_FILE" "$COMMAND_FILE" @@ -774,6 +943,11 @@ cleanup() { stop_remote_peer_b || true fi + if (( relay_started == 1 )); then + log "stopping remote relay on $relay_ssh" + stop_remote_relay || true + fi + if (( server_started == 1 )); then log "stopping remote server on $server_ssh" stop_remote_server || true @@ -794,6 +968,11 @@ handle_terminate() { while [[ $# -gt 0 ]]; do case "$1" in + --mode) + [[ $# -ge 2 ]] || die "--mode requires a value" + run_mode="$2" + shift 2 + ;; --server-ssh) [[ $# -ge 2 ]] || die "--server-ssh requires a value" server_ssh="$2" @@ -804,11 +983,26 @@ while [[ $# -gt 0 ]]; do peerb_ssh="$2" shift 2 ;; + --relay-ssh) + [[ $# -ge 2 ]] || die "--relay-ssh requires a value" + relay_ssh="$2" + shift 2 + ;; --server-addr) [[ $# -ge 2 ]] || die "--server-addr requires a value" server_addr="$2" shift 2 ;; + --relay-addr) + [[ $# -ge 2 ]] || die "--relay-addr requires a value" + relay_addr="$2" + shift 2 + ;; + --relay-remote) + [[ $# -ge 2 ]] || die "--relay-remote requires a value" + relay_remote="$2" + shift 2 + ;; --log-prefix) [[ $# -ge 2 ]] || die "--log-prefix requires a value" log_prefix="$2" @@ -819,11 +1013,21 @@ while [[ $# -gt 0 ]]; do listen_addr="$2" shift 2 ;; + --relay-listen-addr) + [[ $# -ge 2 ]] || die "--relay-listen-addr requires a value" + relay_listen_addr="$2" + shift 2 + ;; --server-workdir) [[ $# -ge 2 ]] || die "--server-workdir requires a value" server_workdir="$2" shift 2 ;; + --relay-workdir) + [[ $# -ge 2 ]] || die "--relay-workdir requires a value" + relay_workdir="$2" + shift 2 + ;; --peerb-workdir) [[ $# -ge 2 ]] || die "--peerb-workdir requires a value" peerb_workdir="$2" @@ -869,12 +1073,19 @@ while [[ $# -gt 0 ]]; do esac done +[[ "$run_mode" == "direct" || "$run_mode" == "relay" ]] || die "--mode must be 'direct' or 'relay', got: $run_mode" [[ -n "$server_ssh" ]] || die "--server-ssh is required" [[ -n "$peerb_ssh" ]] || die "--peerb-ssh is required" [[ -n "$server_addr" ]] || die "--server-addr is required" [[ -n "$log_prefix" ]] || die "--log-prefix is required" (( ${#peerb_files[@]} > 0 )) || die "at least one --file is required" +if [[ "$run_mode" == "relay" ]]; then + [[ -n "$relay_ssh" ]] || die "--relay-ssh is required in relay mode" + [[ -n "$relay_addr" ]] || die "--relay-addr is required in relay mode" + [[ -n "$relay_remote" ]] || die "--relay-remote is required in relay mode" +fi + validate_positive_integer "--ready-timeout" "$ready_timeout" validate_positive_integer "--repeat" "$repeat_count" validate_sleep_value "--send-interval" "$send_interval" @@ -882,6 +1093,16 @@ validate_sleep_value "--drain-wait" "$drain_wait" check_local_dependencies +# Extract ports and build peer connection addresses. +server_port="${listen_addr##*:}" +server_connect_addr="${server_addr}:${server_port}" + +relay_connect_addr="" +if [[ "$run_mode" == "relay" ]]; then + relay_port="${relay_listen_addr##*:}" + relay_connect_addr="${relay_addr}:${relay_port}" +fi + log_dir_name="${log_prefix}logs" inbox_dir_name="${log_prefix}inbox" @@ -902,6 +1123,15 @@ server_log_dir="$(join_path "$server_workdir" "$log_dir_name")" server_pid_file="$(join_path "$server_log_dir" "server.pid")" server_stdout_log="$(join_path "$server_log_dir" "server.stdout.log")" +relay_log_dir="" +relay_pid_file="" +relay_stdout_log="" +if [[ "$run_mode" == "relay" ]]; then + relay_log_dir="$(join_path "$relay_workdir" "$log_dir_name")" + relay_pid_file="$(join_path "$relay_log_dir" "relay.pid")" + relay_stdout_log="$(join_path "$relay_log_dir" "relay.stdout.log")" +fi + peerb_log_dir="$(join_path "$peerb_workdir" "$log_dir_name")" peerb_inbox_dir="$(join_path "$peerb_workdir" "$inbox_dir_name/peer-b")" peerb_stdout_log="$(join_path "$peerb_log_dir" "peer-b.stdout.log")" @@ -919,13 +1149,23 @@ clean_log_directories mkdir -p "$local_log_dir" "$local_peer_a_inbox" +log "run mode: $run_mode" log "local peer-a logs: $local_log_dir" log "remote server logs: $server_log_dir" +if [[ "$run_mode" == "relay" ]]; then + log "remote relay logs: $relay_log_dir" +fi log "remote peer-b logs: $peerb_log_dir" check_remote_peerb_files start_remote_server wait_for_remote_server_ready + +if [[ "$run_mode" == "relay" ]]; then + start_remote_relay + wait_for_remote_relay_ready +fi + start_local_peer_a start_remote_peer_b wait_for_local_peer_a_ready @@ -940,6 +1180,15 @@ if [[ -n "$peer_a_pid" ]]; then stop_local_peer_a fi +if (( relay_started == 1 )); then + log "stopping remote relay on $relay_ssh after batch" + if stop_remote_relay; then + relay_started=0 + else + log "failed to stop remote relay cleanly; cleanup will retry" + fi +fi + if (( server_started == 1 )); then log "stopping remote server on $server_ssh after batch" if stop_remote_server; then