132 lines
3.8 KiB
Bash
132 lines
3.8 KiB
Bash
#!/usr/bin/env bash
|
|
set -euo pipefail
|
|
|
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
# shellcheck disable=SC1091
|
|
source "${SCRIPT_DIR}/common.sh"
|
|
|
|
STEP="incident-capture"
|
|
incident_id=""
|
|
incident_source=""
|
|
incident_reason=""
|
|
incident_unit=""
|
|
incident_result=""
|
|
incident_exit_status=""
|
|
|
|
run_capture() {
|
|
local output_path="$1"
|
|
shift
|
|
|
|
if command -v timeout >/dev/null 2>&1; then
|
|
timeout "${BLITZ_INCIDENT_COMMAND_TIMEOUT_SEC}s" "$@" > "${output_path}" 2>&1 || true
|
|
else
|
|
"$@" > "${output_path}" 2>&1 || true
|
|
fi
|
|
}
|
|
|
|
while (($# > 0)); do
|
|
case "$1" in
|
|
--incident-id)
|
|
incident_id="${2:-}"
|
|
shift 2
|
|
;;
|
|
--source)
|
|
incident_source="${2:-}"
|
|
shift 2
|
|
;;
|
|
--reason)
|
|
incident_reason="${2:-}"
|
|
shift 2
|
|
;;
|
|
--unit)
|
|
incident_unit="${2:-}"
|
|
shift 2
|
|
;;
|
|
--result)
|
|
incident_result="${2:-}"
|
|
shift 2
|
|
;;
|
|
--exit-status)
|
|
incident_exit_status="${2:-}"
|
|
shift 2
|
|
;;
|
|
*)
|
|
blitz_log "${STEP}" "parse-arg" "failure" "unknown argument: $1" 2
|
|
exit 2
|
|
;;
|
|
esac
|
|
done
|
|
|
|
if [[ -n "${incident_result}" && "${incident_result}" == "success" ]]; then
|
|
exit 0
|
|
fi
|
|
|
|
blitz_load_boot_env
|
|
blitz_load_run_context_env || true
|
|
blitz_prepare_runtime_dir
|
|
blitz_prepare_run_root
|
|
|
|
if [[ -z "${incident_id}" ]]; then
|
|
incident_id="$(blitz_new_incident_id)"
|
|
fi
|
|
|
|
incident_dir="${BLITZ_RUN_ROOT}/incidents/${incident_id}"
|
|
mkdir -p "${incident_dir}"
|
|
|
|
python3 - "${incident_dir}/incident.json" "${incident_id}" "${BLITZ_RUN_ID:-}" "${incident_source}" "${incident_reason}" "${incident_unit}" "${incident_result}" "${incident_exit_status}" "${BLITZ_RUN_DIR:-}" "${HOSTNAME:-$(hostname)}" <<'PY'
|
|
import json
|
|
import sys
|
|
import time
|
|
|
|
path, incident_id, run_id, source, reason, unit, result, exit_status, run_dir, hostname = sys.argv[1:10]
|
|
payload = {
|
|
"incident_id": incident_id,
|
|
"run_id": run_id,
|
|
"source": source,
|
|
"fault_reason": reason,
|
|
"unit": unit,
|
|
"service_result": result,
|
|
"exit_status": exit_status,
|
|
"run_dir": run_dir,
|
|
"hostname": hostname,
|
|
"captured_at": time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()),
|
|
}
|
|
with open(path, "w", encoding="utf-8") as handle:
|
|
json.dump(payload, handle, ensure_ascii=False, indent=2, sort_keys=True)
|
|
PY
|
|
|
|
for status_file in \
|
|
"${BLITZ_RUNTIME_DIR}/watchdog.status.json" \
|
|
"${BLITZ_RUNTIME_DIR}/b-side-omnid.status.json" \
|
|
"${BLITZ_RUNTIME_DIR}/ros-receiver.status.json"
|
|
do
|
|
if [[ -f "${status_file}" ]]; then
|
|
cp -f "${status_file}" "${incident_dir}/$(basename "${status_file}")"
|
|
fi
|
|
done
|
|
|
|
if [[ -f "${BLITZ_LOG_FILE}" ]]; then
|
|
tail -n 400 "${BLITZ_LOG_FILE}" > "${incident_dir}/startup.log.tail"
|
|
fi
|
|
|
|
run_capture "${incident_dir}/systemctl-status.txt" \
|
|
systemctl status blitz-robot.target blitz-run-context.service blitz-5g-dial.service blitz-5g-link-logger.service blitz-ros-receiver.service blitz-b-side-omnid.service blitz-watchdog.service
|
|
run_capture "${incident_dir}/journal.txt" \
|
|
journalctl --no-pager --since "5 minutes ago" -u blitz-run-context.service -u blitz-5g-dial.service -u blitz-5g-link-logger.service -u blitz-ros-receiver.service -u blitz-b-side-omnid.service -u blitz-watchdog.service
|
|
run_capture "${incident_dir}/ip-addr.txt" ip addr
|
|
run_capture "${incident_dir}/ip-route.txt" ip route
|
|
run_capture "${incident_dir}/ss-uapn.txt" ss -uapn
|
|
run_capture "${incident_dir}/ss-xlp.txt" ss -xlp
|
|
|
|
if [[ -f "${BLITZ_5G_INFO_JSON:-}" ]]; then
|
|
cp -f "${BLITZ_5G_INFO_JSON}" "${incident_dir}/$(basename "${BLITZ_5G_INFO_JSON}")"
|
|
fi
|
|
|
|
if [[ -n "${BLITZ_RUN_DIR:-}" && -d "${BLITZ_RUN_DIR}" ]]; then
|
|
while IFS= read -r -d '' jsonl; do
|
|
tail -n 200 "${jsonl}" > "${incident_dir}/tail-$(basename "${jsonl}")"
|
|
done < <(find "${BLITZ_RUN_DIR}" -maxdepth 1 -type f -name '*.jsonl' -print0 2>/dev/null)
|
|
fi
|
|
|
|
blitz_log "${STEP}" "complete" "success" "incident_id=${incident_id} path=${incident_dir}" 0
|