Xendit webhook: metadata.app routing + survival audit log + rolling fallback file

Every Xendit invoice now carries metadata: { app: 'halobestie_v2' } so an
external webhook router (no DB access) can fan out v1/v2 traffic purely off
the echoed payload.

Every inbound webhook lands in a new webhook_logs table BEFORE auth or
business logic, so a forensic row survives 401/409/unknown/exception paths.
Primary fields are parsed as columns; raw_body keeps the full payload
verbatim. The handler captures outcome in closure-scoped vars and stamps
http_status/processing_result/processing_error in a single update before
the lone reply.send() — Fastify flushes reply.send() immediately, which
defeated the original finally-block stamp.

A non-UUID external_id no longer crashes the Postgres cast; it ACKs with
ignored_non_uuid_external_id so Xendit stops retrying legacy old-app IDs.

When the DB log itself fails, an optional rolling JSONL file sink absorbs
the event. Disabled by default — opt in via XENDIT_WEBHOOK_FALLBACK_ENABLED.
Naming: <NAME>-YYYY-MM-DD.jsonl in XENDIT_WEBHOOK_FALLBACK_DIR (default
./logs), basename XENDIT_WEBHOOK_FALLBACK_NAME (default
xendit-webhook-fallback). No stdout fallback by design.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-05-25 22:09:14 +08:00
parent 553dbac52f
commit 3052f7b799
8 changed files with 783 additions and 63 deletions

View File

@@ -6,87 +6,205 @@
// `x-callback-token` header verified against env XENDIT_WEBHOOK_TOKEN.
//
// Body shape from Xendit Invoice callback (relevant fields only):
// { id, external_id, status, amount, payment_method, paid_at, ... }
// { id, external_id, status, amount, payment_method, paid_at, metadata, ... }
//
// Handled statuses: PAID (→ confirmPayment), EXPIRED (→ expirePayment).
// Anything else ACKs with `{ ok: true, ignored: <status> }` for forward-compat.
//
// All state transitions go through payment.service.js — this handler is just
// the entry point. Events emit from inside the service, not from here.
// SURVIVAL LOGGING — every inbound webhook lands in `webhook_logs` BEFORE the
// auth check or any business logic, so we keep a forensic record even when:
// • the token is wrong (401)
// • the body is malformed / missing external_id
// • the referenced payment_request doesn't exist
// • amount mismatches and we reject (409)
// • downstream processing throws (500)
//
// If the DB insert itself fails, an optional rolling-file sink can absorb the
// event (see WEBHOOK_FALLBACK_* env vars in webhook-log.service.js). The sink
// is disabled by default — production opts in by mounting a persistent volume
// and flipping WEBHOOK_FALLBACK_ENABLED=true. We deliberately do NOT fall
// back to stdout; operators decide if/where survival writes happen.
//
// CONTROL FLOW NOTE — branches set `result` + `httpStatus` + `responseBody`
// into closure vars rather than calling `reply.send()` directly. Fastify
// flushes the response on `reply.send()` and `app.inject` resolves before any
// post-send finally would run, which would lose the outcome columns. We
// instead update the log once at the end, then send the reply once.
import { confirmPayment, expirePayment, getPayment, verifyWebhookToken } from '../../services/payment.service.js'
import {
insertWebhookLog,
sanitizeHeaders,
updateWebhookLog,
writeWebhookFallback,
} from '../../services/webhook-log.service.js'
// Our payment_requests.id is a UUID. A non-UUID external_id is either a legacy
// old-app invoice (should have been filtered by the router) or a stray Xendit
// event for a product we don't model. Skip the DB lookup (it would throw on
// the cast) and ACK so Xendit stops retrying.
const UUID_RE = /^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$/i
export const paymentWebhookRoutes = async (app) => {
app.post('/webhooks/xendit', async (request, reply) => {
const headerToken = request.headers['x-callback-token']
if (!verifyWebhookToken(headerToken)) {
request.log.warn('xendit webhook: bad token')
return reply.code(401).send({ error: 'invalid_token' })
}
const body = request.body ?? {}
const invoiceId = body.id
const paymentRequestId = body.external_id
const status = body.status
const amount = typeof body.amount === 'number' ? body.amount : null
const paymentMethod = body.payment_method ?? null
request.log.info(
{ paymentRequestId, invoiceId, status, amount, paymentMethod },
'xendit webhook received',
)
if (!paymentRequestId) {
// Forward-compat: future Xendit event types may not carry external_id
return reply.send({ ok: true, ignored: 'no_external_id' })
}
const existing = await getPayment(paymentRequestId)
if (!existing) {
// Unknown payment — could be stale orphan from a wiped dev DB. ACK so Xendit
// stops retrying; warn so we notice if this becomes common in prod.
request.log.warn({ paymentRequestId, invoiceId }, 'unknown payment_request — ACKing')
return reply.send({ ok: true, ignored: 'unknown_payment_request' })
}
if (status === 'PAID') {
// Defensive: amount mismatch = either tampering or config drift. Refuse to confirm.
if (amount !== null && amount !== existing.amount) {
request.log.error(
{ paymentRequestId, expected: existing.amount, got: amount },
'xendit webhook: amount mismatch',
)
return reply.code(409).send({ error: 'amount_mismatch' })
}
const headerToken = request.headers['x-callback-token']
const tokenValid = verifyWebhookToken(headerToken)
const sanitizedHeaders = sanitizeHeaders(request.headers)
// --- STEP 1: survival log ----------------------------------------------
// Insert before doing anything else. If this throws, attempt the optional
// rolling-file sink (env-gated, off by default). If that ALSO fails or is
// disabled, we silently continue — no stdout fallback by design, the
// operator's env config decides where survival writes go.
let logId = null
try {
logId = await insertWebhookLog({
provider: 'xendit',
headers: sanitizedHeaders,
rawBody: body,
callbackTokenValid: tokenValid,
})
} catch (dbErr) {
try {
await confirmPayment(paymentRequestId, { invoiceId, paymentMethod, amount })
} catch (err) {
// INVALID_STATE = already confirmed/consumed (Xendit retry); CONFLICT = race lost. ACK.
// EXPIRED = customer paid AFTER our sweeper expired the row — painful, manual recovery
// needed. Log loud so we notice. (D5 alignment should keep this rare.)
if (err.code === 'INVALID_STATE' || err.code === 'CONFLICT') {
request.log.info(
{ paymentRequestId, code: err.code, prevStatus: existing.status },
'xendit webhook: already in terminal state, ACKing',
)
} else if (err.code === 'EXPIRED') {
request.log.error(
{ paymentRequestId, expiredAt: existing.expires_at },
'xendit webhook: PAID after expiry — manual recovery needed',
)
await writeWebhookFallback({
provider: 'xendit',
headers: sanitizedHeaders,
rawBody: body,
callbackTokenValid: tokenValid,
dbErrorMessage: dbErr?.message,
})
} catch {
// Both sinks down. Nothing left to try — proceed without an audit
// trail. The operator chose this configuration; failure here is
// their disk/permission/config issue to monitor externally.
}
}
// --- STEP 2: process ---------------------------------------------------
// Defaults match the "uncaught exception in processing" case — finally
// (well, the post-try outcome stamp below) will stamp these into the log.
let result = 'error'
let httpStatus = 500
let responseBody = { error: 'internal_error' }
let errorMsg = null
try {
if (!tokenValid) {
request.log.warn('xendit webhook: bad token')
result = 'rejected_invalid_token'
httpStatus = 401
responseBody = { error: 'invalid_token' }
} else {
const invoiceId = body.id
const paymentRequestId = body.external_id
const status = body.status
const amount = typeof body.amount === 'number' ? body.amount : null
const paymentMethod = body.payment_method ?? null
request.log.info(
{ paymentRequestId, invoiceId, status, amount, paymentMethod },
'xendit webhook received',
)
if (!paymentRequestId) {
// Forward-compat: future Xendit event types may not carry external_id
result = 'ignored_no_external_id'
httpStatus = 200
responseBody = { ok: true, ignored: 'no_external_id' }
} else if (!UUID_RE.test(paymentRequestId)) {
// Not one of ours — see UUID_RE comment above.
request.log.warn({ paymentRequestId, invoiceId }, 'non-UUID external_id — ACKing')
result = 'ignored_non_uuid_external_id'
httpStatus = 200
responseBody = { ok: true, ignored: 'non_uuid_external_id' }
} else {
throw err
const existing = await getPayment(paymentRequestId)
if (!existing) {
// Unknown payment — could be stale orphan from a wiped dev DB. ACK so Xendit
// stops retrying; warn so we notice if this becomes common in prod.
request.log.warn({ paymentRequestId, invoiceId }, 'unknown payment_request — ACKing')
result = 'ignored_unknown_payment_request'
httpStatus = 200
responseBody = { ok: true, ignored: 'unknown_payment_request' }
} else if (status === 'PAID') {
// Defensive: amount mismatch = either tampering or config drift. Refuse to confirm.
if (amount !== null && amount !== existing.amount) {
request.log.error(
{ paymentRequestId, expected: existing.amount, got: amount },
'xendit webhook: amount mismatch',
)
result = 'rejected_amount_mismatch'
httpStatus = 409
responseBody = { error: 'amount_mismatch' }
} else {
try {
await confirmPayment(paymentRequestId, { invoiceId, paymentMethod, amount })
result = 'confirmed'
} catch (err) {
// INVALID_STATE = already confirmed/consumed (Xendit retry); CONFLICT = race lost. ACK.
// EXPIRED = customer paid AFTER our sweeper expired the row — painful, manual
// recovery needed. Log loud so we notice. (D5 alignment should keep
// this rare.)
if (err.code === 'INVALID_STATE' || err.code === 'CONFLICT') {
request.log.info(
{ paymentRequestId, code: err.code, prevStatus: existing.status },
'xendit webhook: already in terminal state, ACKing',
)
result = `idempotent_${err.code.toLowerCase()}`
} else if (err.code === 'EXPIRED') {
request.log.error(
{ paymentRequestId, expiredAt: existing.expires_at },
'xendit webhook: PAID after expiry — manual recovery needed',
)
result = 'paid_after_expiry'
} else {
throw err
}
}
httpStatus = 200
responseBody = { ok: true }
}
} else if (status === 'EXPIRED') {
await expirePayment(paymentRequestId)
result = 'expired'
httpStatus = 200
responseBody = { ok: true }
} else {
result = `ignored_${status}`
httpStatus = 200
responseBody = { ok: true, ignored: status }
}
}
}
return reply.send({ ok: true })
} catch (err) {
// Anything that bubbles out of the processing block lands here. We do NOT
// re-throw — instead we ACK to ourselves (logged loudly) and return 500
// so Xendit retries on its own schedule. The forensic row records why.
errorMsg = err?.message ?? String(err)
request.log.error(
{ err: errorMsg, paymentRequestId: body.external_id },
'xendit webhook: unhandled exception during processing',
)
// Defaults already at result='error', httpStatus=500, responseBody=error
}
if (status === 'EXPIRED') {
await expirePayment(paymentRequestId)
return reply.send({ ok: true })
// --- STEP 3: outcome stamp + reply -------------------------------------
// Awaited so the log row is consistent before we hand control back to the
// framework. Wrapped in its own try/catch: a failed update must never
// prevent us from responding to Xendit.
if (logId) {
try {
await updateWebhookLog(logId, {
httpStatus,
processingResult: result,
processingError: errorMsg,
})
} catch (updErr) {
request.log.error({ err: updErr?.message, logId }, 'webhook_log update failed')
}
}
return reply.send({ ok: true, ignored: status })
return reply.code(httpStatus).send(responseBody)
})
}