feat: 长保持连接,控制端可重启
This commit is contained in:
@@ -214,6 +214,9 @@ class HubTelemetryReceiver:
|
||||
self._last_error = ""
|
||||
self._last_received_wall = 0.0
|
||||
self._last_received_monotonic = 0.0
|
||||
self._reconnect_count = 0
|
||||
self._ever_connected = False
|
||||
self._closing = threading.Event()
|
||||
self._load_backend()
|
||||
|
||||
def _load_backend(self) -> None:
|
||||
@@ -259,7 +262,7 @@ class HubTelemetryReceiver:
|
||||
return
|
||||
|
||||
with self._lock:
|
||||
if self._started:
|
||||
if self._started or self._closing.is_set():
|
||||
return
|
||||
self._started = True
|
||||
self._thread = threading.Thread(
|
||||
@@ -270,14 +273,18 @@ class HubTelemetryReceiver:
|
||||
self._thread.start()
|
||||
|
||||
def _run(self) -> None:
|
||||
while True:
|
||||
while not self._closing.is_set():
|
||||
try:
|
||||
session = self._connect_session()
|
||||
with self._lock:
|
||||
self._session = session
|
||||
self._last_error = ""
|
||||
if self._ever_connected:
|
||||
self._reconnect_count += 1
|
||||
else:
|
||||
self._ever_connected = True
|
||||
|
||||
while True:
|
||||
while not self._closing.is_set():
|
||||
result = session.recv(timeout_ms=1000)
|
||||
if result is None:
|
||||
continue
|
||||
@@ -302,18 +309,28 @@ class HubTelemetryReceiver:
|
||||
self._last_received_monotonic = now_mono
|
||||
self._last_error = ""
|
||||
except Exception as error: # pragma: no cover - runtime integration path
|
||||
with self._lock:
|
||||
self._last_error = str(error)
|
||||
if not self._closing.is_set():
|
||||
session_error = ""
|
||||
if self._session is not None:
|
||||
try:
|
||||
session_error = str(dict(self._session.stats()).get("last_server_error", "") or "")
|
||||
except Exception:
|
||||
session_error = ""
|
||||
with self._lock:
|
||||
self._last_error = session_error or str(error)
|
||||
finally:
|
||||
with self._lock:
|
||||
session = self._session
|
||||
self._session = None
|
||||
if self._closing.is_set():
|
||||
self._started = False
|
||||
if session is not None:
|
||||
try:
|
||||
session.close()
|
||||
except Exception:
|
||||
pass
|
||||
time.sleep(2)
|
||||
if not self._closing.is_set():
|
||||
time.sleep(2)
|
||||
|
||||
def get_snapshot(self) -> dict[str, Any]:
|
||||
self.ensure_started()
|
||||
@@ -326,6 +343,14 @@ class HubTelemetryReceiver:
|
||||
snapshot = self._latest_snapshot
|
||||
connected = self._session is not None
|
||||
last_error = self._last_error
|
||||
reconnect_count = self._reconnect_count
|
||||
if self._session is not None:
|
||||
try:
|
||||
session_stats = dict(self._session.stats())
|
||||
except Exception:
|
||||
session_stats = {}
|
||||
else:
|
||||
session_stats = {}
|
||||
|
||||
stale = True
|
||||
if received_monotonic > 0.0:
|
||||
@@ -339,8 +364,24 @@ class HubTelemetryReceiver:
|
||||
"peer_id": str(cfg.get("peer_id", "peer-a-telemetry")),
|
||||
"snapshot": snapshot or {"sessions": []},
|
||||
"last_error": last_error,
|
||||
"registered": bool(session_stats.get("registered", 0)),
|
||||
"last_server_error": str(session_stats.get("last_server_error", "") or ""),
|
||||
"reconnect_count": reconnect_count,
|
||||
}
|
||||
|
||||
def close(self) -> None:
|
||||
self._closing.set()
|
||||
with self._lock:
|
||||
session = self._session
|
||||
if session is not None:
|
||||
try:
|
||||
session.close()
|
||||
except Exception:
|
||||
pass
|
||||
thread = self._thread
|
||||
if thread is not None and thread.is_alive():
|
||||
thread.join(timeout=0.5)
|
||||
|
||||
|
||||
class NetworkTelemetryService:
|
||||
def __init__(
|
||||
@@ -362,6 +403,7 @@ class NetworkTelemetryService:
|
||||
self._sample_thread: threading.Thread | None = None
|
||||
self._sample_started = False
|
||||
self._last_remote_snapshot_at = 0.0
|
||||
self._closing = threading.Event()
|
||||
|
||||
def _ensure_started(self) -> None:
|
||||
self._video_receiver.ensure_started()
|
||||
@@ -369,7 +411,7 @@ class NetworkTelemetryService:
|
||||
self._native_ingress.ensure_started()
|
||||
self._hub_receiver.ensure_started()
|
||||
with self._rate_lock:
|
||||
if self._sample_started:
|
||||
if self._sample_started or self._closing.is_set():
|
||||
return
|
||||
self._sample_started = True
|
||||
self._sample_thread = threading.Thread(
|
||||
@@ -381,7 +423,7 @@ class NetworkTelemetryService:
|
||||
|
||||
def _sample_loop(self) -> None:
|
||||
interval_seconds = LOCAL_SAMPLE_INTERVAL_MS / 1000.0
|
||||
while True:
|
||||
while not self._closing.is_set():
|
||||
try:
|
||||
self._trend_tracker.add_sample("a_to_d.video", self._video_receiver.session_kcp_stats())
|
||||
self._trend_tracker.add_sample("a_to_d.control", self._control_sender.session_kcp_stats())
|
||||
@@ -431,9 +473,12 @@ class NetworkTelemetryService:
|
||||
stale: bool,
|
||||
) -> dict[str, Any]:
|
||||
described = self._trend_tracker.describe(trend_key, current_kcp)
|
||||
connected = bool(described["kcp"].get("connected"))
|
||||
if app_stats is not None and "registered" in app_stats:
|
||||
connected = bool(app_stats.get("registered"))
|
||||
return {
|
||||
"peer_id": peer_id,
|
||||
"connected": bool(described["kcp"].get("connected")),
|
||||
"connected": connected,
|
||||
"updated_at": updated_at,
|
||||
"stale": stale,
|
||||
"app": app_stats,
|
||||
@@ -561,9 +606,13 @@ class NetworkTelemetryService:
|
||||
)
|
||||
latency_ms = primary_kcp.get("srtt_ms") if primary_session is not None else None
|
||||
jitter_ms = primary_kcp.get("srttvar_ms") if primary_session is not None else None
|
||||
local_control_registered = bool(control_app.get("registered", 0))
|
||||
remote_control_fresh = bool(remote_sessions["control"].get("connected")) and not bool(remote_sessions["control"].get("stale"))
|
||||
|
||||
if fresh_connected_sessions > 0:
|
||||
if local_control_registered and remote_control_fresh:
|
||||
peer_status = "online"
|
||||
elif local_control_registered or bool(local_sessions["video"].get("connected")):
|
||||
peer_status = "degraded"
|
||||
elif sender_status.get("backend_ready"):
|
||||
peer_status = "idle"
|
||||
else:
|
||||
@@ -605,6 +654,9 @@ class NetworkTelemetryService:
|
||||
"hub_stale": remote_stale,
|
||||
"last_error": telemetry_state.get("last_error", ""),
|
||||
"peer_id": telemetry_state.get("peer_id", ""),
|
||||
"registered": bool(telemetry_state.get("registered", False)),
|
||||
"last_server_error": str(telemetry_state.get("last_server_error", "") or ""),
|
||||
"reconnect_count": int(telemetry_state.get("reconnect_count", 0)),
|
||||
},
|
||||
"ingress": {
|
||||
"native_udp": ingress_status,
|
||||
@@ -614,3 +666,9 @@ class NetworkTelemetryService:
|
||||
"sender": sender_status,
|
||||
},
|
||||
}
|
||||
|
||||
def close(self) -> None:
|
||||
self._closing.set()
|
||||
thread = self._sample_thread
|
||||
if thread is not None and thread.is_alive():
|
||||
thread.join(timeout=0.5)
|
||||
|
||||
Reference in New Issue
Block a user