Mitra ping: decouple stale-after from app cadence

Splits the single mitra_ping_interval_seconds config (which conflated
"how often the app pings" with "how long until offline" through a
hidden ×3 multiplier) into two orthogonal knobs:

- mitra_stale_after_seconds (CC-tunable, app_config DB row): the
  operator-facing offline threshold. What you set is what you get —
  no multiplier. Default 45s (preserves today's effective grace at
  the legacy 15s ping default).
- MITRA_HEARTBEAT_CADENCE_SECONDS (env var, default 30s): how often
  the mitra app sends a heartbeat. Backend-fixed per deployment;
  surfaced to the mitra app via /api/mitra/status.

Backend:
- config.service: getMitraPingConfig returns the new tuple
  {require_ping, stale_after_seconds, heartbeat_cadence_seconds}.
  Env parser handles blank/non-numeric → 30 fallback.
- mitra-status.service::autoOfflineStaleMitras drops the *3 and uses
  stale_after_seconds directly.
- mitra-status.service::getStatus returns heartbeat_cadence_seconds
  instead of ping_interval_seconds.
- /internal/config/mitra-ping PATCH validates
  stale_after_seconds >= cadence, returns 422 with a clear message
  ("stale_after_seconds must be a number >= heartbeat cadence (30s)").
- migrate.js: adds mitra_stale_after_seconds default 45. The old
  mitra_ping_interval_seconds key is left in place (vestigial) —
  no live code reads it; safe to drop after one release.

Mitra app:
- status_notifier reads heartbeat_cadence_seconds, uses it directly
  as the Timer.periodic interval. Defaults to 30s if missing (older
  backend safety).

Control center:
- SettingsPage: renames "Interval Ping" → "Ambang offline", input
  min={heartbeat_cadence_seconds}, shows the cadence as a read-only
  value with explanation that it's env-controlled.

Verified end-to-end on dev backend:
- GET /api/mitra/status returns {…, heartbeat_cadence_seconds: 30}
- GET /internal/config/mitra-ping returns {require_ping,
  stale_after_seconds: 45, heartbeat_cadence_seconds: 30}
- PATCH with stale_after_seconds=20 → 422 with cadence message
- PATCH with stale_after_seconds=120 → 200, persisted
- Env override (=60, blank, "foo") parses correctly.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-05-21 20:39:59 +08:00
parent 1653482d54
commit a8c20d929e
6 changed files with 80 additions and 22 deletions

View File

@@ -295,6 +295,18 @@ const migrate = async () => {
ON CONFLICT (key) DO NOTHING
`
// Mitra reachability — replaces the implicit `ping_interval * 3` grace
// window with an operator-facing "max heartbeat age" knob. The companion
// heartbeat cadence lives in env (MITRA_HEARTBEAT_CADENCE_SECONDS, default
// 30s). Default 45s keeps the same effective grace as the old 15s ping × 3.
// `mitra_ping_interval_seconds` is left in place (vestigial) — no live code
// path reads it anymore; safe to drop after one release.
await sql`
INSERT INTO app_config (key, value)
VALUES ('mitra_stale_after_seconds', '{"value": 45}')
ON CONFLICT (key) DO NOTHING
`
// --- Phase 3.2: Mitra Request Activity Log ---
await sql`

View File

@@ -9,7 +9,7 @@ import {
getFreeTrialConfig, setFreeTrialConfig,
getExtensionTimeoutConfig, setExtensionTimeoutConfig,
getEarlyEndConfig, setEarlyEndConfig,
getMitraPingConfig, setMitraPingConfig,
getMitraPingConfig, setMitraPingConfig, getMitraHeartbeatCadenceSeconds,
getSensitivityConfig, setSensitivityConfig,
getPaymentSessionTimeoutMinutes, setPaymentSessionTimeoutMinutes,
getReturningChatConfirmationTimeoutSeconds, setReturningChatConfirmationTimeoutSeconds,
@@ -173,14 +173,23 @@ export const internalConfigRoutes = async (app) => {
app.patch('/mitra-ping', {
preHandler: [authenticate, attachCcUser, requirePermission('config', 'update')],
}, async (request, reply) => {
const { require_ping, ping_interval_seconds } = request.body ?? {}
const { require_ping, stale_after_seconds } = request.body ?? {}
if (require_ping !== undefined && typeof require_ping !== 'boolean') {
return reply.code(422).send({ success: false, error: { code: 'VALIDATION_ERROR', message: 'require_ping must be a boolean' } })
}
if (ping_interval_seconds !== undefined && (typeof ping_interval_seconds !== 'number' || ping_interval_seconds < 5)) {
return reply.code(422).send({ success: false, error: { code: 'VALIDATION_ERROR', message: 'ping_interval_seconds must be a number >= 5' } })
if (stale_after_seconds !== undefined) {
const cadence = getMitraHeartbeatCadenceSeconds()
if (typeof stale_after_seconds !== 'number' || stale_after_seconds < cadence) {
return reply.code(422).send({
success: false,
error: {
code: 'VALIDATION_ERROR',
message: `stale_after_seconds must be a number >= heartbeat cadence (${cadence}s)`,
},
})
}
}
const config = await setMitraPingConfig({ require_ping, ping_interval_seconds })
const config = await setMitraPingConfig({ require_ping, stale_after_seconds })
return reply.send({ success: true, data: config })
})

View File

@@ -128,18 +128,38 @@ export const getEarlyEndConfig = async () => {
}
}
// --- Phase 3.1: Mitra Ping Config ---
// --- Mitra reachability config ---
//
// Two separate concerns, deliberately decoupled:
// - heartbeat_cadence_seconds: how often the mitra app sends a heartbeat.
// Fixed per backend deployment via the MITRA_HEARTBEAT_CADENCE_SECONDS
// env (default 30). The mitra app reads this from /api/mitra/status and
// uses it directly as its Timer.periodic interval.
// - stale_after_seconds: how long the backend tolerates silence before
// marking a mitra offline. DB-stored, CC-tunable. Must be >= the
// heartbeat cadence (CC PATCH validates this).
//
// `require_ping` stays as the master switch — when false, the auto-offline
// sweep is skipped entirely and mitras stay online forever once they toggle.
export const getMitraHeartbeatCadenceSeconds = () => {
const raw = process.env.MITRA_HEARTBEAT_CADENCE_SECONDS
if (!raw || raw.trim() === '') return 30
const parsed = Number.parseInt(raw, 10)
return Number.isFinite(parsed) && parsed >= 5 ? parsed : 30
}
export const getMitraPingConfig = async () => {
const [requireRow] = await sql`SELECT value FROM app_config WHERE key = 'require_mitra_ping'`
const [intervalRow] = await sql`SELECT value FROM app_config WHERE key = 'mitra_ping_interval_seconds'`
const [staleRow] = await sql`SELECT value FROM app_config WHERE key = 'mitra_stale_after_seconds'`
return {
require_ping: requireRow?.value?.value ?? true,
ping_interval_seconds: intervalRow?.value?.value ?? 15,
stale_after_seconds: staleRow?.value?.value ?? 45,
heartbeat_cadence_seconds: getMitraHeartbeatCadenceSeconds(),
}
}
export const setMitraPingConfig = async ({ require_ping, ping_interval_seconds }) => {
export const setMitraPingConfig = async ({ require_ping, stale_after_seconds }) => {
if (require_ping !== undefined) {
await sql`
INSERT INTO app_config (key, value, updated_at)
@@ -147,10 +167,10 @@ export const setMitraPingConfig = async ({ require_ping, ping_interval_seconds }
ON CONFLICT (key) DO UPDATE SET value = EXCLUDED.value, updated_at = NOW()
`
}
if (ping_interval_seconds !== undefined) {
if (stale_after_seconds !== undefined) {
await sql`
INSERT INTO app_config (key, value, updated_at)
VALUES ('mitra_ping_interval_seconds', ${sql.json({ value: ping_interval_seconds })}, NOW())
VALUES ('mitra_stale_after_seconds', ${sql.json({ value: stale_after_seconds })}, NOW())
ON CONFLICT (key) DO UPDATE SET value = EXCLUDED.value, updated_at = NOW()
`
}

View File

@@ -96,7 +96,9 @@ export const getStatus = async (mitraId) => {
return {
...status,
require_ping: pingConfig.require_ping,
ping_interval_seconds: pingConfig.ping_interval_seconds,
// The app reads this to set its Timer.periodic interval. Backend-fixed
// (via env), not operator-tunable.
heartbeat_cadence_seconds: pingConfig.heartbeat_cadence_seconds,
}
}
@@ -134,7 +136,12 @@ export const autoOfflineStaleMitras = async () => {
// If ping is not required, skip the auto-offline sweep entirely
if (!pingConfig.require_ping) return 0
const staleSeconds = pingConfig.ping_interval_seconds * 3
// stale_after_seconds is the operator-facing knob — what they set is what
// they get. No multiplier, no implicit "tolerate N missed heartbeats"
// contract baked in. The CC PATCH validates that the value is >= the env-
// driven heartbeat cadence so single missed pings can't flip a mitra
// offline.
const staleSeconds = pingConfig.stale_after_seconds
const stale = await sql`
UPDATE mitra_online_status
SET is_online = false, last_offline_at = NOW(), updated_at = NOW()