feat: 增加链路统计信息,两个链路分别显示在前端

This commit is contained in:
Mock
2026-04-09 13:38:47 +08:00
parent ec025f1c5c
commit 497a28c1b2
7 changed files with 838 additions and 87 deletions

View File

@@ -2,7 +2,7 @@ from __future__ import annotations
import os import os
import struct import struct
from datetime import UTC, datetime from datetime import datetime, timezone
from pathlib import Path from pathlib import Path
from typing import Any from typing import Any
@@ -32,7 +32,7 @@ ZERO_CONTROL_PAYLOAD = CONTROL_PACKET.pack(0.0, 0.0, 0.0, 0.0, 0.0, 0.0)
def utc_iso_now() -> str: def utc_iso_now() -> str:
return datetime.now(UTC).isoformat(timespec="seconds").replace("+00:00", "Z") return datetime.now(timezone.utc).isoformat(timespec="seconds").replace("+00:00", "Z")
def parse_simple_yaml_scalar(value: str) -> Any: def parse_simple_yaml_scalar(value: str) -> Any:
@@ -97,6 +97,8 @@ def load_omnisocket_config() -> dict[str, Any]:
video_receiver_cfg = dict(config.get("video_receiver", {})) video_receiver_cfg = dict(config.get("video_receiver", {}))
control_sender_cfg = dict(config.get("control_sender", {})) control_sender_cfg = dict(config.get("control_sender", {}))
control_ingress_cfg = dict(config.get("control_ingress", {})) control_ingress_cfg = dict(config.get("control_ingress", {}))
video_sender_cfg = dict(config.get("video_sender", {}))
telemetry_receiver_cfg = dict(config.get("telemetry_receiver", {}))
transport_cfg["server_addr"] = os.getenv( transport_cfg["server_addr"] = os.getenv(
"OMNISOCKET_SERVER_ADDR", "OMNISOCKET_SERVER_ADDR",
@@ -135,6 +137,15 @@ def load_omnisocket_config() -> dict[str, Any]:
str(control_sender_cfg.get("target_peer", "peer-b-ctrl")), str(control_sender_cfg.get("target_peer", "peer-b-ctrl")),
) )
video_sender_cfg["peer_id"] = os.getenv(
"OMNISOCKET_VIDEO_SENDER_PEER_ID",
str(video_sender_cfg.get("peer_id", "peer-b-video")),
)
video_sender_cfg["target_peer"] = os.getenv(
"OMNISOCKET_VIDEO_TARGET_PEER_ID",
str(video_sender_cfg.get("target_peer", "peer-a-video")),
)
control_ingress_cfg["native_udp_bind"] = os.getenv( control_ingress_cfg["native_udp_bind"] = os.getenv(
"OMNISOCKET_CONTROL_NATIVE_UDP_BIND", "OMNISOCKET_CONTROL_NATIVE_UDP_BIND",
str(control_ingress_cfg.get("native_udp_bind", "127.0.0.1:10921")), str(control_ingress_cfg.get("native_udp_bind", "127.0.0.1:10921")),
@@ -158,11 +169,30 @@ def load_omnisocket_config() -> dict[str, Any]:
) )
) )
telemetry_receiver_cfg["peer_id"] = os.getenv(
"OMNISOCKET_TELEMETRY_PEER_ID",
str(telemetry_receiver_cfg.get("peer_id", "peer-a-telemetry")),
)
telemetry_receiver_cfg["interval_ms"] = int(
os.getenv(
"OMNISOCKET_TELEMETRY_INTERVAL_MS",
str(telemetry_receiver_cfg.get("interval_ms", 500)),
)
)
telemetry_receiver_cfg["stale_after_ms"] = int(
os.getenv(
"OMNISOCKET_TELEMETRY_STALE_AFTER_MS",
str(telemetry_receiver_cfg.get("stale_after_ms", telemetry_receiver_cfg["interval_ms"] * 3)),
)
)
return { return {
"transport": transport_cfg, "transport": transport_cfg,
"video_receiver": video_receiver_cfg, "video_receiver": video_receiver_cfg,
"control_sender": control_sender_cfg, "control_sender": control_sender_cfg,
"control_ingress": control_ingress_cfg, "control_ingress": control_ingress_cfg,
"video_sender": video_sender_cfg,
"telemetry_receiver": telemetry_receiver_cfg,
} }

View File

@@ -1,12 +1,13 @@
from __future__ import annotations from __future__ import annotations
from .control import ControlArbiter, NativeUdpControlIngress, OmniSocketControlSender from .control import ControlArbiter, NativeUdpControlIngress, OmniSocketControlSender
from .telemetry import GpsDataService, NetworkTelemetryService from .telemetry import GpsDataService, HubTelemetryReceiver, NetworkTelemetryService
from .video import OmniSocketVideoReceiver, VideoFrameService from .video import OmniSocketVideoReceiver, VideoFrameService
_video_receiver = OmniSocketVideoReceiver() _video_receiver = OmniSocketVideoReceiver()
_control_sender = OmniSocketControlSender() _control_sender = OmniSocketControlSender()
_hub_telemetry_receiver = HubTelemetryReceiver()
control_arbiter = ControlArbiter(_control_sender) control_arbiter = ControlArbiter(_control_sender)
native_control_ingress = NativeUdpControlIngress(control_arbiter) native_control_ingress = NativeUdpControlIngress(control_arbiter)
@@ -18,5 +19,6 @@ network_service = NetworkTelemetryService(
_control_sender, _control_sender,
control_arbiter, control_arbiter,
native_control_ingress, native_control_ingress,
_hub_telemetry_receiver,
) )

View File

@@ -1,17 +1,54 @@
from __future__ import annotations from __future__ import annotations
from collections import deque
import json import json
import math import math
import sys
import threading import threading
import time import time
from datetime import UTC, datetime from datetime import datetime, timezone
from typing import Any from typing import Any
from .common import GEOSTREAM_JSON_PATH, GEOSTREAM_STALE_SECONDS, utc_iso_now from .common import (
GEOSTREAM_JSON_PATH,
GEOSTREAM_STALE_SECONDS,
WORKSPACE_ROOT,
load_omnisocket_config,
utc_iso_now,
)
from .control import ControlArbiter, NativeUdpControlIngress, OmniSocketControlSender from .control import ControlArbiter, NativeUdpControlIngress, OmniSocketControlSender
from .video import OmniSocketVideoReceiver from .video import OmniSocketVideoReceiver
LOCAL_SAMPLE_INTERVAL_MS = 500
TREND_HISTORY_SIZE = 10
TREND_WINDOW_SIZE = 5
def _utc_from_epoch(epoch_seconds: float | None) -> str | None:
if epoch_seconds is None or epoch_seconds <= 0.0:
return None
return datetime.fromtimestamp(epoch_seconds, timezone.utc).isoformat(timespec="seconds").replace("+00:00", "Z")
def _coerce_int(value: Any, default: int = 0) -> int:
try:
if value is None:
return default
return int(value)
except (TypeError, ValueError):
return default
def _coerce_float(value: Any, default: float = 0.0) -> float:
try:
if value is None:
return default
return float(value)
except (TypeError, ValueError):
return default
class GpsDataService: class GpsDataService:
def get_latest(self) -> dict[str, Any]: def get_latest(self) -> dict[str, Any]:
payload = self._read_geostream_payload() payload = self._read_geostream_payload()
@@ -43,7 +80,7 @@ class GpsDataService:
return { return {
"has_fix": True, "has_fix": True,
"utc_time": datetime.now(UTC).strftime("%H:%M:%S"), "utc_time": datetime.now(timezone.utc).strftime("%H:%M:%S"),
"latitude": round(latitude, 6), "latitude": round(latitude, 6),
"longitude": round(longitude, 6), "longitude": round(longitude, 6),
"satellites": 14 + int((math.sin(tick * 0.7) + 1.0) * 2), "satellites": 14 + int((math.sin(tick * 0.7) + 1.0) * 2),
@@ -56,6 +93,255 @@ class GpsDataService:
} }
class KcpTrendTracker:
def __init__(self) -> None:
self._lock = threading.Lock()
self._samples: dict[str, deque[dict[str, Any]]] = {}
def _normalize(self, stats: dict[str, Any] | None) -> dict[str, Any]:
raw = dict(stats or {})
snd_wnd = _coerce_int(raw.get("snd_wnd"))
rmt_wnd = _coerce_int(raw.get("rmt_wnd"))
inflight = _coerce_int(raw.get("inflight"))
window_limit = _coerce_int(raw.get("window_limit"), min(snd_wnd, rmt_wnd) if snd_wnd and rmt_wnd else 0)
return {
"connected": _coerce_int(raw.get("connected")),
"conv": _coerce_int(raw.get("conv")),
"rto_ms": _coerce_int(raw.get("rto_ms")),
"srtt_ms": _coerce_int(raw.get("srtt_ms")),
"srttvar_ms": _coerce_int(raw.get("srttvar_ms")),
"snd_wnd": snd_wnd,
"rmt_wnd": rmt_wnd,
"inflight": inflight,
"window_limit": window_limit,
"window_pressure_pct": round(_coerce_float(raw.get("window_pressure_pct")), 3),
"snd_queue": _coerce_int(raw.get("snd_queue")),
"rcv_queue": _coerce_int(raw.get("rcv_queue")),
"snd_buffer": _coerce_int(raw.get("snd_buffer")),
"out_segs_total": _coerce_int(raw.get("out_segs_total")),
"retrans_total": _coerce_int(raw.get("retrans_total")),
"fast_retrans_total": _coerce_int(raw.get("fast_retrans_total")),
"lost_total": _coerce_int(raw.get("lost_total")),
"repeat_total": _coerce_int(raw.get("repeat_total")),
"xmit_total": _coerce_int(raw.get("xmit_total")),
}
def add_sample(self, key: str, stats: dict[str, Any] | None) -> None:
sample = {
"ts_monotonic": time.monotonic(),
"updated_at": utc_iso_now(),
"stats": self._normalize(stats),
}
with self._lock:
history = self._samples.setdefault(key, deque(maxlen=TREND_HISTORY_SIZE))
history.append(sample)
def latest_updated_at(self, key: str) -> str | None:
with self._lock:
history = self._samples.get(key)
if not history:
return None
return str(history[-1].get("updated_at") or "")
def describe(self, key: str, current_stats: dict[str, Any] | None) -> dict[str, Any]:
current = self._normalize(current_stats)
with self._lock:
history = list(self._samples.get(key, ()))
timeline = history + [{"stats": current, "updated_at": utc_iso_now()}]
previous = timeline[-2]["stats"] if len(timeline) >= 2 else None
trend_window = [entry["stats"] for entry in timeline[-TREND_WINDOW_SIZE:]]
deadband = max(2.0, 0.05 * float(max(current.get("window_limit", 0), 1)))
snd_queue_delta = 0
snd_buffer_delta = 0
retrans_delta = 0
fast_retrans_delta = 0
lost_delta = 0
repeat_delta = 0
out_segs_delta = 0
if previous is not None:
snd_queue_delta = max(0, current["snd_queue"] - _coerce_int(previous.get("snd_queue")))
snd_buffer_delta = max(0, current["snd_buffer"] - _coerce_int(previous.get("snd_buffer")))
retrans_delta = max(0, current["retrans_total"] - _coerce_int(previous.get("retrans_total")))
fast_retrans_delta = max(0, current["fast_retrans_total"] - _coerce_int(previous.get("fast_retrans_total")))
lost_delta = max(0, current["lost_total"] - _coerce_int(previous.get("lost_total")))
repeat_delta = max(0, current["repeat_total"] - _coerce_int(previous.get("repeat_total")))
out_segs_delta = max(0, current["out_segs_total"] - _coerce_int(previous.get("out_segs_total")))
def classify(field: str) -> str:
if len(trend_window) < 2:
return "stable"
oldest = float(_coerce_int(trend_window[0].get(field)))
newest = float(_coerce_int(trend_window[-1].get(field)))
delta = newest - oldest
if abs(delta) < deadband:
return "stable"
return "rising" if delta > 0 else "falling"
repair_rate_pct = 0.0
if out_segs_delta > 0:
repair_rate_pct = round((retrans_delta / out_segs_delta) * 100.0, 3)
return {
"kcp": current,
"trend": {
"snd_queue_delta": snd_queue_delta,
"snd_buffer_delta": snd_buffer_delta,
"snd_queue_trend": classify("snd_queue"),
"snd_buffer_trend": classify("snd_buffer"),
"retrans_delta": retrans_delta,
"fast_retrans_delta": fast_retrans_delta,
"lost_delta": lost_delta,
"repeat_delta": repeat_delta,
"out_segs_delta": out_segs_delta,
"repair_rate_pct": repair_rate_pct,
},
}
class HubTelemetryReceiver:
def __init__(self) -> None:
self._lock = threading.Lock()
self._thread: threading.Thread | None = None
self._started = False
self._session = None
self._session_cls = None
self._msg_type_text = None
self._msg_type_error = None
self._telemetry_defaults: dict[str, Any] = {}
self._latest_snapshot: dict[str, Any] | None = None
self._last_error = ""
self._last_received_wall = 0.0
self._last_received_monotonic = 0.0
self._load_backend()
def _load_backend(self) -> None:
try:
self._import_backend()
except Exception as error: # pragma: no cover - optional runtime dependency
self._last_error = f"omnisocket import failed: {error}"
def _import_backend(self) -> None:
try:
from omnisocket import MSG_TYPE_ERROR, MSG_TYPE_TEXT, Session, TELEMETRY_DEFAULTS # type: ignore
except ImportError:
python_dir = WORKSPACE_ROOT / "OmniSocketGo" / "python"
if python_dir.exists():
sys.path.insert(0, str(python_dir))
from omnisocket import MSG_TYPE_ERROR, MSG_TYPE_TEXT, Session, TELEMETRY_DEFAULTS # type: ignore
self._msg_type_error = MSG_TYPE_ERROR
self._msg_type_text = MSG_TYPE_TEXT
self._session_cls = Session
self._telemetry_defaults = dict(TELEMETRY_DEFAULTS)
def _connect_session(self):
assert self._session_cls is not None
config = load_omnisocket_config()
transport_cfg = config.get("transport", {})
telemetry_cfg = config.get("telemetry_receiver", {})
session = self._session_cls()
session.connect(
server_addr=str(transport_cfg.get("server_addr", "127.0.0.1:10909")),
peer_id=str(telemetry_cfg.get("peer_id", "peer-a-telemetry")),
relay_via=str(transport_cfg.get("relay_via", "")),
bind_ip=str(transport_cfg.get("bind_ip", "")),
bind_device=str(transport_cfg.get("bind_device", "")),
**self._telemetry_defaults,
)
return session
def ensure_started(self) -> None:
if self._session_cls is None:
return
with self._lock:
if self._started:
return
self._started = True
self._thread = threading.Thread(
target=self._run,
name="hub-telemetry-receiver",
daemon=True,
)
self._thread.start()
def _run(self) -> None:
while True:
try:
session = self._connect_session()
with self._lock:
self._session = session
self._last_error = ""
while True:
result = session.recv(timeout_ms=1000)
if result is None:
continue
from_peer, msg_type, payload = result
if msg_type == self._msg_type_error:
with self._lock:
self._last_error = f"hub error from {from_peer}: {payload.decode('utf-8', errors='replace')}"
continue
if msg_type != self._msg_type_text:
continue
snapshot = json.loads(payload.decode("utf-8"))
if snapshot.get("type") != "hub_kcp_snapshot":
continue
now_wall = time.time()
now_mono = time.monotonic()
with self._lock:
self._latest_snapshot = snapshot
self._last_received_wall = now_wall
self._last_received_monotonic = now_mono
self._last_error = ""
except Exception as error: # pragma: no cover - runtime integration path
with self._lock:
self._last_error = str(error)
finally:
with self._lock:
session = self._session
self._session = None
if session is not None:
try:
session.close()
except Exception:
pass
time.sleep(2)
def get_snapshot(self) -> dict[str, Any]:
self.ensure_started()
cfg = load_omnisocket_config().get("telemetry_receiver", {})
stale_after_ms = max(500, int(cfg.get("stale_after_ms", 1500)))
with self._lock:
received_monotonic = self._last_received_monotonic
received_wall = self._last_received_wall
snapshot = self._latest_snapshot
connected = self._session is not None
last_error = self._last_error
stale = True
if received_monotonic > 0.0:
stale = (time.monotonic() - received_monotonic) * 1000.0 > stale_after_ms
return {
"connected": connected,
"updated_at": _utc_from_epoch(received_wall),
"received_at_monotonic": received_monotonic,
"stale": stale,
"peer_id": str(cfg.get("peer_id", "peer-a-telemetry")),
"snapshot": snapshot or {"sessions": []},
"last_error": last_error,
}
class NetworkTelemetryService: class NetworkTelemetryService:
def __init__( def __init__(
self, self,
@@ -63,13 +349,45 @@ class NetworkTelemetryService:
control_sender: OmniSocketControlSender, control_sender: OmniSocketControlSender,
control_arbiter: ControlArbiter, control_arbiter: ControlArbiter,
native_ingress: NativeUdpControlIngress, native_ingress: NativeUdpControlIngress,
hub_receiver: HubTelemetryReceiver,
) -> None: ) -> None:
self._video_receiver = video_receiver self._video_receiver = video_receiver
self._control_sender = control_sender self._control_sender = control_sender
self._control_arbiter = control_arbiter self._control_arbiter = control_arbiter
self._native_ingress = native_ingress self._native_ingress = native_ingress
self._hub_receiver = hub_receiver
self._trend_tracker = KcpTrendTracker()
self._rate_lock = threading.Lock() self._rate_lock = threading.Lock()
self._last_rate_sample: tuple[float, int, int] | None = None self._last_rate_sample: tuple[float, int, int] | None = None
self._sample_thread: threading.Thread | None = None
self._sample_started = False
self._last_remote_snapshot_at = 0.0
def _ensure_started(self) -> None:
self._video_receiver.ensure_started()
self._control_arbiter.ensure_started()
self._native_ingress.ensure_started()
self._hub_receiver.ensure_started()
with self._rate_lock:
if self._sample_started:
return
self._sample_started = True
self._sample_thread = threading.Thread(
target=self._sample_loop,
name="network-telemetry-sampler",
daemon=True,
)
self._sample_thread.start()
def _sample_loop(self) -> None:
interval_seconds = LOCAL_SAMPLE_INTERVAL_MS / 1000.0
while True:
try:
self._trend_tracker.add_sample("a_to_d.video", self._video_receiver.session_kcp_stats())
self._trend_tracker.add_sample("a_to_d.control", self._control_sender.session_kcp_stats())
except Exception:
pass
time.sleep(interval_seconds)
def _compute_rates(self, send_bytes: int, recv_bytes: int) -> tuple[float, float]: def _compute_rates(self, send_bytes: int, recv_bytes: int) -> tuple[float, float]:
now = time.monotonic() now = time.monotonic()
@@ -89,10 +407,85 @@ class NetworkTelemetryService:
rx_kbps = max(0.0, ((recv_bytes - prev_recv) * 8.0) / elapsed / 1000.0) rx_kbps = max(0.0, ((recv_bytes - prev_recv) * 8.0) / elapsed / 1000.0)
return tx_kbps, rx_kbps return tx_kbps, rx_kbps
def _ingest_remote_snapshot(self, telemetry_state: dict[str, Any]) -> None:
received_at = float(telemetry_state.get("received_at_monotonic") or 0.0)
if received_at <= 0.0 or received_at <= self._last_remote_snapshot_at:
return
snapshot = telemetry_state.get("snapshot") or {}
sessions = snapshot.get("sessions") or []
for session in sessions:
peer_id = str(session.get("peer_id", "")).strip()
if not peer_id:
continue
self._trend_tracker.add_sample(f"hub::{peer_id}", session)
self._last_remote_snapshot_at = received_at
def _build_session_payload(
self,
trend_key: str,
peer_id: str,
app_stats: dict[str, Any] | None,
current_kcp: dict[str, Any] | None,
updated_at: str | None,
stale: bool,
) -> dict[str, Any]:
described = self._trend_tracker.describe(trend_key, current_kcp)
return {
"peer_id": peer_id,
"connected": bool(described["kcp"].get("connected")),
"updated_at": updated_at,
"stale": stale,
"app": app_stats,
"kcp": described["kcp"],
"trend": described["trend"],
}
def _build_link(self, source: str, updated_at: str | None, stale: bool, sessions: dict[str, dict[str, Any]]) -> dict[str, Any]:
session_items = list(sessions.values())
active_sessions = [session for session in session_items if session.get("connected") and not session.get("stale")]
retrans_sum = sum(_coerce_int(session.get("trend", {}).get("retrans_delta")) for session in active_sessions)
out_segs_sum = sum(_coerce_int(session.get("trend", {}).get("out_segs_delta")) for session in active_sessions)
repair_rate_pct = round((retrans_sum / out_segs_sum) * 100.0, 3) if out_segs_sum > 0 else 0.0
return {
"source": source,
"updated_at": updated_at,
"stale": stale,
"aggregate": {
"online_sessions": len(active_sessions),
"max_window_pressure_pct": max(
(_coerce_float(session.get("kcp", {}).get("window_pressure_pct")) for session in active_sessions),
default=0.0,
),
"sum_snd_queue": sum(_coerce_int(session.get("kcp", {}).get("snd_queue")) for session in active_sessions),
"sum_snd_buffer": sum(_coerce_int(session.get("kcp", {}).get("snd_buffer")) for session in active_sessions),
"sum_retrans_delta": retrans_sum,
"sum_out_segs_delta": out_segs_sum,
"repair_rate_pct": repair_rate_pct,
},
"sessions": sessions,
}
def _pick_primary_session(self, links: dict[str, dict[str, Any]]) -> dict[str, Any] | None:
candidates = (
links["a_to_d"]["sessions"]["control"],
links["a_to_d"]["sessions"]["video"],
links["d_to_b"]["sessions"]["control"],
links["d_to_b"]["sessions"]["video"],
)
for session in candidates:
if session.get("connected") and not session.get("stale"):
return session
return None
def get_latest(self) -> dict[str, Any]: def get_latest(self) -> dict[str, Any]:
self._video_receiver.ensure_started() self._ensure_started()
self._control_arbiter.ensure_started()
self._native_ingress.ensure_started() config = load_omnisocket_config()
video_receiver_cfg = config.get("video_receiver", {})
control_sender_cfg = config.get("control_sender", {})
video_sender_cfg = config.get("video_sender", {})
video_app = self._video_receiver.session_stats() video_app = self._video_receiver.session_stats()
control_app = self._control_sender.session_stats() control_app = self._control_sender.session_stats()
@@ -101,20 +494,75 @@ class NetworkTelemetryService:
arbiter_status = self._control_arbiter.get_status() arbiter_status = self._control_arbiter.get_status()
ingress_status = self._native_ingress.get_status() ingress_status = self._native_ingress.get_status()
sender_status = self._control_sender.get_status() sender_status = self._control_sender.get_status()
telemetry_state = self._hub_receiver.get_snapshot()
total_send_bytes = int(video_app.get("send_bytes", 0)) + int(control_app.get("send_bytes", 0)) total_send_bytes = int(video_app.get("send_bytes", 0)) + int(control_app.get("send_bytes", 0))
total_recv_bytes = int(video_app.get("recv_bytes", 0)) + int(control_app.get("recv_bytes", 0)) total_recv_bytes = int(video_app.get("recv_bytes", 0)) + int(control_app.get("recv_bytes", 0))
tx_kbps, rx_kbps = self._compute_rates(total_send_bytes, total_recv_bytes) tx_kbps, rx_kbps = self._compute_rates(total_send_bytes, total_recv_bytes)
video_connected = int(video_app.get("connected", 0)) local_updated_at = utc_iso_now()
control_connected = int(control_app.get("connected", 0)) local_sessions = {
connected_sessions = video_connected + control_connected "video": self._build_session_payload(
"a_to_d.video",
str(video_receiver_cfg.get("peer_id", "peer-a-video")),
video_app,
video_kcp,
local_updated_at,
False,
),
"control": self._build_session_payload(
"a_to_d.control",
str(control_sender_cfg.get("peer_id", "peer-a-ctrl")),
control_app,
control_kcp,
local_updated_at,
False,
),
}
primary_kcp = control_kcp if control_connected else video_kcp remote_snapshot = telemetry_state.get("snapshot") or {}
latency_ms = primary_kcp.get("srtt_ms") remote_sessions_by_peer = {
jitter_ms = primary_kcp.get("srttvar_ms") str(session.get("peer_id", "")).strip(): session
for session in remote_snapshot.get("sessions", []) or []
if str(session.get("peer_id", "")).strip()
}
remote_updated_at = telemetry_state.get("updated_at")
remote_stale = bool(telemetry_state.get("stale", True))
remote_sessions = {
"video": self._build_session_payload(
f"hub::{str(video_sender_cfg.get('peer_id', 'peer-b-video'))}",
str(video_sender_cfg.get("peer_id", "peer-b-video")),
None,
remote_sessions_by_peer.get(str(video_sender_cfg.get("peer_id", "peer-b-video")), {}),
remote_updated_at,
remote_stale,
),
"control": self._build_session_payload(
f"hub::{str(control_sender_cfg.get('target_peer', 'peer-b-ctrl'))}",
str(control_sender_cfg.get("target_peer", "peer-b-ctrl")),
None,
remote_sessions_by_peer.get(str(control_sender_cfg.get("target_peer", "peer-b-ctrl")), {}),
remote_updated_at,
remote_stale,
),
}
if connected_sessions > 0: links = {
"a_to_d": self._build_link("local-a-side", local_updated_at, False, local_sessions),
"d_to_b": self._build_link("hub-telemetry", remote_updated_at, remote_stale, remote_sessions),
}
primary_session = self._pick_primary_session(links)
primary_kcp = dict(primary_session.get("kcp", {})) if primary_session is not None else {}
self._ingest_remote_snapshot(telemetry_state)
fresh_connected_sessions = (
links["a_to_d"]["aggregate"]["online_sessions"] + links["d_to_b"]["aggregate"]["online_sessions"]
)
latency_ms = primary_kcp.get("srtt_ms") if primary_session is not None else None
jitter_ms = primary_kcp.get("srttvar_ms") if primary_session is not None else None
if fresh_connected_sessions > 0:
peer_status = "online" peer_status = "online"
elif sender_status.get("backend_ready"): elif sender_status.get("backend_ready"):
peer_status = "idle" peer_status = "idle"
@@ -129,12 +577,12 @@ class NetworkTelemetryService:
"tx_kbps": round(tx_kbps, 3), "tx_kbps": round(tx_kbps, 3),
"rx_kbps": round(rx_kbps, 3), "rx_kbps": round(rx_kbps, 3),
"transport": "OmniSocket / kcp", "transport": "OmniSocket / kcp",
"source_mode": "omnisocket-live" if connected_sessions > 0 else "omnisocket-idle", "source_mode": "omnisocket-live" if fresh_connected_sessions > 0 else "omnisocket-idle",
"updated_at": utc_iso_now(), "updated_at": utc_iso_now(),
"active_control_source": arbiter_status["active_source"], "active_control_source": arbiter_status["active_source"],
"control_lease_remaining_ms": arbiter_status["control_lease_remaining_ms"], "control_lease_remaining_ms": arbiter_status["control_lease_remaining_ms"],
"combined": { "combined": {
"connected_sessions": connected_sessions, "connected_sessions": fresh_connected_sessions,
"send_bytes": total_send_bytes, "send_bytes": total_send_bytes,
"recv_bytes": total_recv_bytes, "recv_bytes": total_recv_bytes,
"tx_kbps": round(tx_kbps, 3), "tx_kbps": round(tx_kbps, 3),
@@ -143,13 +591,21 @@ class NetworkTelemetryService:
"sessions": { "sessions": {
"video": { "video": {
"app": video_app, "app": video_app,
"kcp": video_kcp, "kcp": local_sessions["video"]["kcp"],
}, },
"control": { "control": {
"app": control_app, "app": control_app,
"kcp": control_kcp, "kcp": local_sessions["control"]["kcp"],
}, },
}, },
"links": links,
"telemetry_receiver": {
"hub_connected": bool(telemetry_state.get("connected")),
"hub_updated_at": telemetry_state.get("updated_at"),
"hub_stale": remote_stale,
"last_error": telemetry_state.get("last_error", ""),
"peer_id": telemetry_state.get("peer_id", ""),
},
"ingress": { "ingress": {
"native_udp": ingress_status, "native_udp": ingress_status,
}, },
@@ -158,4 +614,3 @@ class NetworkTelemetryService:
"sender": sender_status, "sender": sender_status,
}, },
} }

View File

@@ -18,6 +18,11 @@ control_ingress:
send_rate_hz: 20.0 send_rate_hz: 20.0
zero_burst_packets: 3 zero_burst_packets: 3
telemetry_receiver:
peer_id: "peer-a-telemetry"
interval_ms: 500
stale_after_ms: 1500
video_sender: video_sender:
peer_id: "peer-b-video" peer_id: "peer-b-video"
target_peer: "peer-a-video" target_peer: "peer-a-video"

View File

@@ -1,20 +1,47 @@
<script setup lang="ts"> <script setup lang="ts">
import { computed } from 'vue' import { computed } from 'vue'
import type { NetworkTelemetry } from '@/types' import type { LinkSessionTelemetry, LinkTelemetry, NetworkTelemetry } from '@/types'
const props = defineProps<{ const props = defineProps<{
network: NetworkTelemetry | null network: NetworkTelemetry | null
}>() }>()
const updatedAt = computed(() => { const legCards = computed(() => [
if (!props.network?.updated_at) { {
return 'unavailable' key: 'a_to_d',
} label: 'A <-> D',
return new Date(props.network.updated_at).toLocaleString('zh-CN', { hour12: false }) data: props.network?.links?.a_to_d ?? null,
}) },
{
key: 'd_to_b',
label: 'D <-> B',
data: props.network?.links?.d_to_b ?? null,
},
])
const activeSource = computed(() => props.network?.active_control_source ?? 'none') const activeSource = computed(() => props.network?.active_control_source ?? 'none')
function formatTime(value?: string | null) {
if (!value) {
return 'unavailable'
}
return new Date(value).toLocaleString('zh-CN', { hour12: false })
}
function formatScalar(value?: number | string | null, suffix = '') {
if (value === null || value === undefined || value === '') {
return '--'
}
return `${value}${suffix}`
}
function legSessions(link: LinkTelemetry | null): Array<{ name: string; data: LinkSessionTelemetry | null }> {
return [
{ name: 'control', data: link?.sessions?.control ?? null },
{ name: 'video', data: link?.sessions?.video ?? null },
]
}
</script> </script>
<template> <template>
@@ -22,19 +49,21 @@ const activeSource = computed(() => props.network?.active_control_source ?? 'non
<div class="panel-head"> <div class="panel-head">
<div> <div>
<p class="eyebrow">Network</p> <p class="eyebrow">Network</p>
<h2>Session Telemetry</h2> <h2>Dual-Leg Telemetry</h2>
</div> </div>
<span class="badge">{{ network?.peer_status ?? 'loading' }}</span> <span class="badge" :class="{ stale: network?.telemetry_receiver?.hub_stale }">
{{ network?.peer_status ?? 'loading' }}
</span>
</div> </div>
<div class="stats"> <div class="stats">
<div class="stat-card"> <div class="stat-card">
<span>Latency</span> <span>Latency</span>
<strong>{{ network?.latency_ms ?? '--' }} ms</strong> <strong>{{ formatScalar(network?.latency_ms, ' ms') }}</strong>
</div> </div>
<div class="stat-card"> <div class="stat-card">
<span>Jitter</span> <span>Jitter</span>
<strong>{{ network?.jitter_ms ?? '--' }} ms</strong> <strong>{{ formatScalar(network?.jitter_ms, ' ms') }}</strong>
</div> </div>
<div class="stat-card"> <div class="stat-card">
<span>Active Control</span> <span>Active Control</span>
@@ -42,42 +71,108 @@ const activeSource = computed(() => props.network?.active_control_source ?? 'non
</div> </div>
<div class="stat-card"> <div class="stat-card">
<span>Lease</span> <span>Lease</span>
<strong>{{ network?.control_lease_remaining_ms ?? '--' }} ms</strong> <strong>{{ formatScalar(network?.control_lease_remaining_ms, ' ms') }}</strong>
</div> </div>
<div class="stat-card"> <div class="stat-card">
<span>TX Rate</span> <span>TX Rate</span>
<strong>{{ network?.tx_kbps ?? '--' }} kbps</strong> <strong>{{ formatScalar(network?.tx_kbps, ' kbps') }}</strong>
</div> </div>
<div class="stat-card"> <div class="stat-card">
<span>RX Rate</span> <span>RX Rate</span>
<strong>{{ network?.rx_kbps ?? '--' }} kbps</strong> <strong>{{ formatScalar(network?.rx_kbps, ' kbps') }}</strong>
</div> </div>
</div> </div>
<div class="summary"> <div class="summary telemetry-strip">
<p><strong>Transport:</strong> {{ network?.transport ?? 'n/a' }} / {{ network?.source_mode ?? 'n/a' }}</p> <p><strong>Transport:</strong> {{ network?.transport ?? 'n/a' }} / {{ network?.source_mode ?? 'n/a' }}</p>
<p><strong>Combined:</strong> sessions={{ network?.combined?.connected_sessions ?? '--' }} send={{ network?.combined?.send_bytes ?? '--' }}B recv={{ network?.combined?.recv_bytes ?? '--' }}B</p> <p><strong>Telemetry Peer:</strong> {{ network?.telemetry_receiver?.peer_id ?? 'n/a' }}</p>
<p><strong>Refresh:</strong> {{ updatedAt }}</p> <p><strong>Hub Freshness:</strong> {{ formatTime(network?.telemetry_receiver?.hub_updated_at) }}</p>
<p><strong>Hub State:</strong> {{ network?.telemetry_receiver?.hub_stale ? 'stale' : 'fresh' }}</p>
<p v-if="network?.telemetry_receiver?.last_error"><strong>Hub Error:</strong> {{ network?.telemetry_receiver?.last_error }}</p>
</div>
<div class="leg-grid">
<article v-for="leg in legCards" :key="leg.key" class="leg-card" :class="{ stale: leg.data?.stale }">
<div class="leg-head">
<div>
<p class="leg-label">{{ leg.label }}</p>
<h3>{{ leg.data?.source ?? 'waiting' }}</h3>
</div>
<div class="leg-meta">
<span class="mini-badge" :class="{ stale: leg.data?.stale }">
{{ leg.data?.stale ? 'stale' : 'fresh' }}
</span>
<span class="mini-time">{{ formatTime(leg.data?.updated_at) }}</span>
</div>
</div>
<div class="aggregate-grid">
<div>
<span>Online</span>
<strong>{{ leg.data?.aggregate?.online_sessions ?? 0 }}</strong>
</div>
<div>
<span>Max Pressure</span>
<strong>{{ formatScalar(leg.data?.aggregate?.max_window_pressure_pct, '%') }}</strong>
</div>
<div>
<span>Queued</span>
<strong>{{ leg.data?.aggregate?.sum_snd_queue ?? 0 }}</strong>
</div>
<div>
<span>In Flight Buffer</span>
<strong>{{ leg.data?.aggregate?.sum_snd_buffer ?? 0 }}</strong>
</div>
<div>
<span>Retrans Delta</span>
<strong>{{ leg.data?.aggregate?.sum_retrans_delta ?? 0 }}</strong>
</div>
<div>
<span>Repair Rate</span>
<strong>{{ formatScalar(leg.data?.aggregate?.repair_rate_pct, '%') }}</strong>
</div>
</div> </div>
<div class="session-grid"> <div class="session-grid">
<div class="session-card"> <section v-for="session in legSessions(leg.data)" :key="session.name" class="session-card">
<h3>Video Session</h3> <div class="session-head">
<p>connected={{ network?.sessions?.video?.app?.connected ?? 0 }}</p> <div>
<p>recv_bytes={{ network?.sessions?.video?.app?.recv_bytes ?? 0 }}</p> <p class="session-label">{{ session.name }}</p>
<p>srtt={{ network?.sessions?.video?.kcp?.srtt_ms ?? '--' }} ms</p> <h4>{{ session.data?.peer_id ?? 'unassigned' }}</h4>
<p>snd_queue={{ network?.sessions?.video?.kcp?.snd_queue ?? '--' }}</p>
</div> </div>
<div class="session-card"> <span class="mini-badge" :class="{ stale: session.data?.stale, active: session.data?.connected }">
<h3>Control Session</h3> {{ session.data?.connected ? 'online' : 'idle' }}
<p>connected={{ network?.sessions?.control?.app?.connected ?? 0 }}</p> </span>
<p>send_bytes={{ network?.sessions?.control?.app?.send_bytes ?? 0 }}</p>
<p>srtt={{ network?.sessions?.control?.kcp?.srtt_ms ?? '--' }} ms</p>
<p>snd_queue={{ network?.sessions?.control?.kcp?.snd_queue ?? '--' }}</p>
</div> </div>
<div class="kv-grid">
<p><strong>Updated:</strong> {{ formatTime(session.data?.updated_at) }}</p>
<p><strong>SRTT:</strong> {{ formatScalar(session.data?.kcp?.srtt_ms, ' ms') }}</p>
<p><strong>RTTVAR:</strong> {{ formatScalar(session.data?.kcp?.srttvar_ms, ' ms') }}</p>
<p><strong>RTO:</strong> {{ formatScalar(session.data?.kcp?.rto_ms, ' ms') }}</p>
<p><strong>SND WND:</strong> {{ formatScalar(session.data?.kcp?.snd_wnd) }}</p>
<p><strong>RMT WND:</strong> {{ formatScalar(session.data?.kcp?.rmt_wnd) }}</p>
<p><strong>Inflight:</strong> {{ formatScalar(session.data?.kcp?.inflight) }}</p>
<p><strong>Window Limit:</strong> {{ formatScalar(session.data?.kcp?.window_limit) }}</p>
<p><strong>Pressure:</strong> {{ formatScalar(session.data?.kcp?.window_pressure_pct, '%') }}</p>
<p><strong>SND Queue:</strong> {{ formatScalar(session.data?.kcp?.snd_queue) }} / {{ session.data?.trend?.snd_queue_trend ?? 'stable' }}</p>
<p><strong>SND Buffer:</strong> {{ formatScalar(session.data?.kcp?.snd_buffer) }} / {{ session.data?.trend?.snd_buffer_trend ?? 'stable' }}</p>
<p><strong>Queue Delta:</strong> {{ formatScalar(session.data?.trend?.snd_queue_delta) }}</p>
<p><strong>Buffer Delta:</strong> {{ formatScalar(session.data?.trend?.snd_buffer_delta) }}</p>
<p><strong>Retrans:</strong> {{ formatScalar(session.data?.trend?.retrans_delta) }}</p>
<p><strong>Fast Retrans:</strong> {{ formatScalar(session.data?.trend?.fast_retrans_delta) }}</p>
<p><strong>Lost:</strong> {{ formatScalar(session.data?.trend?.lost_delta) }}</p>
<p><strong>Repeat:</strong> {{ formatScalar(session.data?.trend?.repeat_delta) }}</p>
<p><strong>Repair Rate:</strong> {{ formatScalar(session.data?.trend?.repair_rate_pct, '%') }}</p>
<p v-if="session.data?.app"><strong>App Bytes:</strong> tx={{ session.data.app.send_bytes ?? 0 }} / rx={{ session.data.app.recv_bytes ?? 0 }}</p>
</div>
</section>
</div>
</article>
</div> </div>
<div class="summary"> <div class="summary">
<p><strong>Combined:</strong> sessions={{ network?.combined?.connected_sessions ?? 0 }} send={{ network?.combined?.send_bytes ?? 0 }}B recv={{ network?.combined?.recv_bytes ?? 0 }}B</p>
<p><strong>Native UDP:</strong> {{ network?.ingress?.native_udp?.bind_addr ?? 'n/a' }} packets={{ network?.ingress?.native_udp?.packets_received ?? 0 }} invalid={{ network?.ingress?.native_udp?.invalid_packets ?? 0 }}</p> <p><strong>Native UDP:</strong> {{ network?.ingress?.native_udp?.bind_addr ?? 'n/a' }} packets={{ network?.ingress?.native_udp?.packets_received ?? 0 }} invalid={{ network?.ingress?.native_udp?.invalid_packets ?? 0 }}</p>
<p><strong>Control Sender:</strong> {{ network?.control?.sender?.peer_id ?? 'n/a' }} -> {{ network?.control?.sender?.target_peer ?? 'n/a' }} sends={{ network?.control?.sender?.send_count ?? 0 }}</p> <p><strong>Control Sender:</strong> {{ network?.control?.sender?.peer_id ?? 'n/a' }} -> {{ network?.control?.sender?.target_peer ?? 'n/a' }} sends={{ network?.control?.sender?.send_count ?? 0 }}</p>
</div> </div>
@@ -87,98 +182,192 @@ const activeSource = computed(() => props.network?.active_control_source ?? 'non
<style scoped> <style scoped>
.network-panel { .network-panel {
display: grid; display: grid;
gap: 16px; gap: 18px;
} }
.panel-head { .panel-head,
.leg-head,
.session-head {
display: flex; display: flex;
justify-content: space-between; justify-content: space-between;
gap: 12px; gap: 12px;
align-items: start; align-items: start;
} }
.eyebrow { .eyebrow,
.leg-label,
.session-label {
margin: 0 0 4px; margin: 0 0 4px;
color: #4dd4ac; color: #5bd3b5;
text-transform: uppercase; text-transform: uppercase;
letter-spacing: 0.12em; letter-spacing: 0.14em;
font-size: 12px; font-size: 12px;
font-weight: 700; font-weight: 700;
} }
h2 { h2,
h3,
h4 {
margin: 0; margin: 0;
}
h2 {
font-size: 24px; font-size: 24px;
} }
h3 {
font-size: 22px;
}
h4 {
font-size: 16px;
}
.badge,
.mini-badge {
border-radius: 999px;
text-transform: uppercase;
font-weight: 700;
}
.badge { .badge {
padding: 8px 12px; padding: 8px 12px;
border-radius: 999px;
background: rgba(40, 199, 111, 0.16); background: rgba(40, 199, 111, 0.16);
color: #63e6a9; color: #63e6a9;
font-size: 12px; font-size: 12px;
font-weight: 700; }
text-transform: uppercase;
.mini-badge {
padding: 6px 10px;
background: rgba(91, 211, 181, 0.12);
color: #8ff2db;
font-size: 11px;
}
.badge.stale,
.mini-badge.stale {
background: rgba(255, 165, 0, 0.16);
color: #ffd08a;
}
.mini-badge.active {
background: rgba(64, 187, 255, 0.16);
color: #98dcff;
}
.stats,
.leg-grid,
.session-grid,
.aggregate-grid,
.kv-grid {
display: grid;
gap: 12px;
} }
.stats { .stats {
display: grid;
grid-template-columns: repeat(3, minmax(0, 1fr)); grid-template-columns: repeat(3, minmax(0, 1fr));
gap: 12px; }
.leg-grid {
grid-template-columns: repeat(2, minmax(0, 1fr));
}
.session-grid {
grid-template-columns: repeat(2, minmax(0, 1fr));
}
.aggregate-grid {
grid-template-columns: repeat(3, minmax(0, 1fr));
}
.kv-grid {
grid-template-columns: repeat(2, minmax(0, 1fr));
} }
.stat-card, .stat-card,
.summary, .summary,
.leg-card,
.session-card { .session-card {
padding: 14px; padding: 14px;
border-radius: 16px; border-radius: 18px;
background: rgba(7, 14, 26, 0.78); background: rgba(7, 14, 26, 0.8);
border: 1px solid rgba(133, 147, 169, 0.2); border: 1px solid rgba(133, 147, 169, 0.2);
color: #d5dbee;
} }
.stat-card span { .stat-card span,
.aggregate-grid span {
display: block; display: block;
margin-bottom: 8px; margin-bottom: 8px;
color: #8d99b3; color: #8d99b3;
font-size: 12px; font-size: 12px;
} }
.stat-card strong { .stat-card strong,
.aggregate-grid strong {
font-size: 22px; font-size: 22px;
} }
.summary,
.session-card {
color: #d5dbee;
}
.summary p, .summary p,
.session-card h3, .kv-grid p {
.session-card p {
margin: 0; margin: 0;
} }
.summary p + p, .summary p + p {
.session-card p + p {
margin-top: 8px; margin-top: 8px;
} }
.session-grid { .telemetry-strip {
display: grid;
grid-template-columns: repeat(4, minmax(0, 1fr));
gap: 10px;
}
.leg-card {
display: grid;
gap: 16px;
}
.leg-card.stale {
border-color: rgba(255, 165, 0, 0.3);
}
.leg-meta {
display: grid;
justify-items: end;
gap: 8px;
}
.mini-time {
color: #9aa6c2;
font-size: 12px;
}
.session-card {
display: grid; display: grid;
grid-template-columns: repeat(2, minmax(0, 1fr));
gap: 12px; gap: 12px;
background: rgba(11, 19, 35, 0.86);
} }
@media (max-width: 960px) { @media (max-width: 1200px) {
.stats, .stats,
.session-grid { .aggregate-grid,
.telemetry-strip {
grid-template-columns: repeat(2, minmax(0, 1fr)); grid-template-columns: repeat(2, minmax(0, 1fr));
} }
.leg-grid,
.session-grid,
.kv-grid {
grid-template-columns: 1fr;
}
} }
@media (max-width: 640px) { @media (max-width: 720px) {
.stats, .stats,
.session-grid { .aggregate-grid,
.telemetry-strip,
.kv-grid {
grid-template-columns: 1fr; grid-template-columns: 1fr;
} }
} }

View File

@@ -29,9 +29,19 @@ export interface SessionKcpStats {
rto_ms?: number rto_ms?: number
srtt_ms?: number srtt_ms?: number
srttvar_ms?: number srttvar_ms?: number
snd_wnd?: number
rmt_wnd?: number
inflight?: number
window_limit?: number
window_pressure_pct?: number
snd_queue?: number snd_queue?: number
rcv_queue?: number rcv_queue?: number
snd_buffer?: number snd_buffer?: number
out_segs_total?: number
retrans_total?: number
fast_retrans_total?: number
lost_total?: number
repeat_total?: number
xmit_total?: number xmit_total?: number
} }
@@ -40,6 +50,50 @@ export interface SessionTelemetry {
kcp: SessionKcpStats kcp: SessionKcpStats
} }
export interface SessionTrendStats {
snd_queue_delta: number
snd_buffer_delta: number
snd_queue_trend: string
snd_buffer_trend: string
retrans_delta: number
fast_retrans_delta: number
lost_delta: number
repeat_delta: number
out_segs_delta: number
repair_rate_pct: number
}
export interface LinkSessionTelemetry {
peer_id: string
connected: boolean
updated_at: string | null
stale: boolean
app: SessionAppStats | null
kcp: SessionKcpStats
trend: SessionTrendStats
}
export interface LinkAggregateTelemetry {
online_sessions: number
max_window_pressure_pct: number
sum_snd_queue: number
sum_snd_buffer: number
sum_retrans_delta: number
sum_out_segs_delta: number
repair_rate_pct: number
}
export interface LinkTelemetry {
source: string
updated_at: string | null
stale: boolean
aggregate: LinkAggregateTelemetry
sessions: {
control: LinkSessionTelemetry
video: LinkSessionTelemetry
}
}
export interface NativeUdpIngress { export interface NativeUdpIngress {
started: boolean started: boolean
bind_addr: string bind_addr: string
@@ -72,6 +126,14 @@ export interface ControlSenderStatus {
last_error: string last_error: string
} }
export interface TelemetryReceiverStatus {
hub_connected: boolean
hub_updated_at: string | null
hub_stale: boolean
last_error: string
peer_id: string
}
export interface NetworkTelemetry { export interface NetworkTelemetry {
peer_status: string peer_status: string
latency_ms: number | null latency_ms: number | null
@@ -95,6 +157,11 @@ export interface NetworkTelemetry {
video: SessionTelemetry video: SessionTelemetry
control: SessionTelemetry control: SessionTelemetry
} }
links: {
a_to_d: LinkTelemetry
d_to_b: LinkTelemetry
}
telemetry_receiver: TelemetryReceiverStatus
ingress: { ingress: {
native_udp: NativeUdpIngress native_udp: NativeUdpIngress
} }

View File

@@ -2,7 +2,9 @@
import NetworkPanel from '@/components/NetworkPanel.vue' import NetworkPanel from '@/components/NetworkPanel.vue'
import { useMonitoringData } from '@/composables/useMonitoringData' import { useMonitoringData } from '@/composables/useMonitoringData'
const { network, errorMessage, headerStatus } = useMonitoringData() const { network, errorMessage, headerStatus } = useMonitoringData({
refreshIntervalMs: 500,
})
</script> </script>
<template> <template>
@@ -13,8 +15,9 @@ const { network, errorMessage, headerStatus } = useMonitoringData()
<h1>Network Telemetry</h1> <h1>Network Telemetry</h1>
</div> </div>
<p class="description"> <p class="description">
Live per-session OmniSocket telemetry from the unified A-side daemon, including active control Live dual-leg OmniSocket telemetry from the A-side daemon, separating the local `A <-> D`
source and native UDP ingress status. sessions from the hub-reported `D <-> B` leg with queue pressure, retransmission, and stale-link
visibility.
</p> </p>
</header> </header>