feat: 增加日志模块

This commit is contained in:
2026-04-18 12:52:32 +08:00
parent 7cd464bc6a
commit 2ca70d556b
15 changed files with 1263 additions and 186 deletions

View File

@@ -1,5 +1,6 @@
from __future__ import annotations
import json
import socket
import sys
import threading
@@ -10,6 +11,7 @@ from .common import (
CONTROL_PACKET_SIZE,
CONTROL_SOURCE_NATIVE_UDP,
CONTROL_SOURCE_PRIORITY,
JsonlRunLogger,
ZERO_CONTROL_PAYLOAD,
WORKSPACE_ROOT,
load_omnisocket_config,
@@ -18,9 +20,129 @@ from .common import (
from .video import safe_kcp_stats
class OmniSocketControlSender:
class ControlAckTracker:
def __init__(self) -> None:
self._lock = threading.Lock()
self._event_logger = JsonlRunLogger("BLITZ_A_CONTROL_EVENTS_LOG_PATH", "a-control-events")
self._ack_logger = JsonlRunLogger("BLITZ_A_CONTROL_ACKS_LOG_PATH", "a-control-acks")
self._pending: dict[int, dict[str, Any]] = {}
self._latest_estimate: dict[str, Any] = {
"ack_available": False,
"updated_at": None,
"received_mono_ns": 0,
"control_loop_rtt_ms": None,
"b_recv_to_persist_ms": None,
"control_oneway_network_est_ms": None,
"control_to_persist_est_ms": None,
"sample_reason": None,
}
def register_send(
self,
*,
message_id: int,
issued_at_unix_ns: int,
issued_at_mono_ns: int,
source: str,
payload: bytes,
send_call_latency_us: int,
) -> None:
event = {
"ts_unix_nano": issued_at_unix_ns,
"message_id": message_id,
"issued_at_unix_ns": issued_at_unix_ns,
"issued_at_mono_ns": issued_at_mono_ns,
"source": source,
"command_signature": payload.hex(),
"payload_size": len(payload),
"send_call_latency_us": send_call_latency_us,
}
with self._lock:
self._pending[message_id] = event
self._prune_locked(issued_at_mono_ns)
self._event_logger.write(event)
def handle_ack(self, ack_payload: dict[str, Any], received_unix_ns: int, received_mono_ns: int) -> None:
try:
message_id = int(ack_payload["message_id"])
except (KeyError, TypeError, ValueError):
return
with self._lock:
event = self._pending.pop(message_id, None)
self._prune_locked(received_mono_ns)
if event is None:
return
try:
control_loop_rtt_ms = round((received_unix_ns - int(event["issued_at_unix_ns"])) / 1_000_000.0, 3)
b_recv_to_persist_ms = round(float(ack_payload.get("b_recv_to_persist_us", 0)) / 1000.0, 3)
except (TypeError, ValueError):
return
control_oneway_network_est_ms = round(max(0.0, (control_loop_rtt_ms - b_recv_to_persist_ms) / 2.0), 3)
control_to_persist_est_ms = round(control_oneway_network_est_ms + b_recv_to_persist_ms, 3)
ack_record = {
"ts_unix_nano": received_unix_ns,
"received_unix_ns": received_unix_ns,
"received_mono_ns": received_mono_ns,
"message_id": message_id,
"ack_phase": str(ack_payload.get("ack_phase") or "persist_end"),
"sample_reason": str(ack_payload.get("sample_reason") or ""),
"b_recv_to_persist_us": ack_payload.get("b_recv_to_persist_us"),
"unix_send_ok": bool(ack_payload.get("unix_send_ok", False)),
"issued_at_unix_ns": event["issued_at_unix_ns"],
"source": event["source"],
"control_loop_rtt_ms": control_loop_rtt_ms,
"b_recv_to_persist_ms": b_recv_to_persist_ms,
"control_oneway_network_est_ms": control_oneway_network_est_ms,
"control_to_persist_est_ms": control_to_persist_est_ms,
}
self._ack_logger.write(ack_record)
with self._lock:
self._latest_estimate = {
"ack_available": True,
"updated_at": time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime(received_unix_ns / 1_000_000_000)),
"received_mono_ns": received_mono_ns,
"control_loop_rtt_ms": control_loop_rtt_ms,
"b_recv_to_persist_ms": b_recv_to_persist_ms,
"control_oneway_network_est_ms": control_oneway_network_est_ms,
"control_to_persist_est_ms": control_to_persist_est_ms,
"sample_reason": ack_record["sample_reason"],
}
def get_latest_estimate(self) -> dict[str, Any]:
with self._lock:
estimate = dict(self._latest_estimate)
if int(estimate.get("received_mono_ns", 0) or 0) > 0 and time.monotonic_ns() - int(estimate["received_mono_ns"]) > 10_000_000_000:
estimate["ack_available"] = False
estimate["control_loop_rtt_ms"] = None
estimate["b_recv_to_persist_ms"] = None
estimate["control_oneway_network_est_ms"] = None
estimate["control_to_persist_est_ms"] = None
estimate["sample_reason"] = None
estimate.pop("received_mono_ns", None)
return estimate
def close(self) -> None:
self._event_logger.close()
self._ack_logger.close()
def _prune_locked(self, now_mono_ns: int) -> None:
stale_ids = [
message_id
for message_id, event in self._pending.items()
if now_mono_ns - int(event.get("issued_at_mono_ns", 0)) > 60_000_000_000
]
for message_id in stale_ids:
self._pending.pop(message_id, None)
class OmniSocketControlSender:
def __init__(self, ack_tracker: ControlAckTracker) -> None:
self._lock = threading.Lock()
self._ack_tracker = ack_tracker
self._session = None
self._session_cls = None
self._msg_type_error = None
@@ -36,6 +158,7 @@ class OmniSocketControlSender:
self._reconnect_count = 0
self._ever_connected = False
self._registered = False
self._supports_send_with_id = False
self._load_backend()
def _load_backend(self) -> None:
@@ -92,6 +215,7 @@ class OmniSocketControlSender:
self._started = True
self._last_error = ""
self._registered = bool(dict(session.stats()).get("registered", 0))
self._supports_send_with_id = hasattr(session, "send_with_id")
if self._ever_connected:
self._reconnect_count += 1
else:
@@ -111,25 +235,35 @@ class OmniSocketControlSender:
self._session = None
self._started = False
self._registered = False
self._supports_send_with_id = False
if current is not None:
try:
current.close()
except Exception:
pass
def send_payload(self, payload: bytes) -> None:
def send_payload(self, payload: bytes, *, source: str) -> None:
if len(payload) != CONTROL_PACKET_SIZE:
raise ValueError(f"expected {CONTROL_PACKET_SIZE} bytes, got {len(payload)}")
self.ensure_started()
with self._lock:
session = self._session
target_peer = self._target_peer
supports_send_with_id = self._supports_send_with_id
if session is None:
raise RuntimeError("control session is not available")
try:
session.send(to=target_peer, data=payload)
issued_at_unix_ns = time.time_ns()
issued_at_mono_ns = time.monotonic_ns()
send_started_ns = time.perf_counter_ns()
message_id: int | None = None
if supports_send_with_id:
message_id = int(session.send_with_id(to=target_peer, data=payload))
else:
session.send(to=target_peer, data=payload)
send_call_latency_us = max(0, int((time.perf_counter_ns() - send_started_ns) / 1000))
except Exception as error:
with self._lock:
self._send_errors += 1
@@ -137,13 +271,22 @@ class OmniSocketControlSender:
self._reset_session(session)
raise
if message_id is not None:
self._ack_tracker.register_send(
message_id=message_id,
issued_at_unix_ns=issued_at_unix_ns,
issued_at_mono_ns=issued_at_mono_ns,
source=source,
payload=payload,
send_call_latency_us=send_call_latency_us,
)
with self._lock:
self._send_count += 1
def send_zero_burst(self, count: int) -> None:
for _ in range(max(0, count)):
try:
self.send_payload(ZERO_CONTROL_PAYLOAD)
self.send_payload(ZERO_CONTROL_PAYLOAD, source="zero_burst")
except Exception:
return
@@ -236,6 +379,145 @@ class OmniSocketControlSender:
drain_thread.join(timeout=0.5)
class OmniSocketControlAckReceiver:
def __init__(self, ack_tracker: ControlAckTracker) -> None:
self._ack_tracker = ack_tracker
self._lock = threading.Lock()
self._thread: threading.Thread | None = None
self._started = False
self._session = None
self._session_cls = None
self._msg_type_text = None
self._msg_type_error = None
self._control_defaults: dict[str, Any] = {}
self._closing = threading.Event()
self._last_error = ""
self._reconnect_count = 0
self._ever_connected = False
self._load_backend()
def _load_backend(self) -> None:
try:
self._import_backend()
except Exception as error: # pragma: no cover
self._last_error = f"omnisocket import failed: {error}"
def _import_backend(self) -> None:
try:
from omnisocket import CONTROL_DEFAULTS, MSG_TYPE_ERROR, MSG_TYPE_TEXT, Session # type: ignore
except ImportError:
python_dir = WORKSPACE_ROOT / "OmniSocketGo" / "python"
if python_dir.exists():
sys.path.insert(0, str(python_dir))
from omnisocket import CONTROL_DEFAULTS, MSG_TYPE_ERROR, MSG_TYPE_TEXT, Session # type: ignore
self._session_cls = Session
self._msg_type_text = MSG_TYPE_TEXT
self._msg_type_error = MSG_TYPE_ERROR
self._control_defaults = dict(CONTROL_DEFAULTS)
def _connect_session(self):
assert self._session_cls is not None
config = load_omnisocket_config()
transport_cfg = config.get("transport", {})
ack_cfg = config.get("control_ack_receiver", {})
session = self._session_cls()
session.connect(
server_addr=str(transport_cfg.get("server_addr", "127.0.0.1:10909")),
peer_id=str(ack_cfg.get("peer_id", "peer-a-ctrl-ack")),
relay_via=str(transport_cfg.get("relay_via", "")),
bind_ip=str(transport_cfg.get("bind_ip", "")),
bind_device=str(transport_cfg.get("bind_device", "")),
**self._control_defaults,
)
return session, str(ack_cfg.get("expected_sender", "peer-b-ctrl-ack"))
def ensure_started(self) -> None:
if self._session_cls is None:
return
with self._lock:
if self._started or self._closing.is_set():
return
self._started = True
self._thread = threading.Thread(target=self._run, name="omnisocket-control-ack", daemon=True)
self._thread.start()
def _run(self) -> None:
while not self._closing.is_set():
expected_sender = ""
try:
session, expected_sender = self._connect_session()
with self._lock:
self._session = session
self._last_error = ""
if self._ever_connected:
self._reconnect_count += 1
else:
self._ever_connected = True
while not self._closing.is_set():
result = session.recv(timeout_ms=1000)
if result is None:
continue
from_peer, msg_type, payload = result
if msg_type == self._msg_type_error:
with self._lock:
self._last_error = f"ack session error from {from_peer}: {payload.decode('utf-8', errors='replace')}"
continue
if msg_type != self._msg_type_text:
continue
if expected_sender and from_peer != expected_sender:
continue
try:
ack_payload = json.loads(payload.decode("utf-8"))
except (UnicodeDecodeError, json.JSONDecodeError):
continue
self._ack_tracker.handle_ack(ack_payload, time.time_ns(), time.monotonic_ns())
except Exception as error: # pragma: no cover
if not self._closing.is_set():
with self._lock:
self._last_error = str(error)
time.sleep(2)
finally:
if self._session is not None:
try:
self._session.close()
except Exception:
pass
with self._lock:
self._session = None
if self._closing.is_set():
self._started = False
def get_status(self) -> dict[str, Any]:
config = load_omnisocket_config().get("control_ack_receiver", {})
with self._lock:
return {
"backend_ready": self._session_cls is not None,
"started": self._started,
"connected": self._session is not None,
"peer_id": str(config.get("peer_id", "")),
"expected_sender": str(config.get("expected_sender", "")),
"reconnect_count": self._reconnect_count,
"last_error": self._last_error,
}
def close(self) -> None:
self._closing.set()
with self._lock:
session = self._session
if session is not None:
try:
session.close()
except Exception:
pass
thread = self._thread
if thread is not None and thread.is_alive():
thread.join(timeout=0.5)
class ControlArbiter:
def __init__(self, sender: OmniSocketControlSender) -> None:
self._sender = sender
@@ -325,7 +607,7 @@ class ControlArbiter:
self._last_error = str(error)
elif active_source is not None:
try:
self._sender.send_payload(payload)
self._sender.send_payload(payload, source=active_source)
with self._lock:
self._last_sent_at = time.monotonic()
self._last_error = ""