135 lines
5.4 KiB
TypeScript
135 lines
5.4 KiB
TypeScript
import { env } from '../config/env';
|
||
import { vaultAppRoleLogin } from '../config/vault';
|
||
import { fetchJwtKeysFromVault, swapKeyMap, getKeyMapSize } from './jwt.service';
|
||
import { fetchCsrfConfig, swapCsrfConfig } from './csrf.service';
|
||
import { fetchMasterKey, swapMasterKey, masterKeyMatches, isCryptoReady } from './crypto.service';
|
||
import { logger } from '../lib/logger';
|
||
|
||
const DEFAULT_INTERVAL_MS = 60 * 60 * 1000; // 1 hour
|
||
|
||
let timer: NodeJS.Timeout | null = null;
|
||
|
||
// Inflight guard — reentrant calls share the same promise (audit#4 C2/C3).
|
||
// Без этого две параллельные refreshAllKeys могут torn-state'ить keyMap/csrf/crypto.
|
||
let inflight: Promise<RefreshResult> | null = null;
|
||
|
||
// H15/H18 — track refresh outcomes для health endpoint + alarm.
|
||
export interface RefreshResult {
|
||
ok: boolean;
|
||
reason?: string;
|
||
timestamp: number;
|
||
}
|
||
|
||
let lastRefresh: RefreshResult = { ok: false, reason: 'not_yet_run', timestamp: 0 };
|
||
let consecutiveFailures = 0;
|
||
|
||
export function getLastRefreshResult(): RefreshResult {
|
||
return lastRefresh;
|
||
}
|
||
export function getConsecutiveFailures(): number {
|
||
return consecutiveFailures;
|
||
}
|
||
|
||
/**
|
||
* Atomic refresh: pre-fetch JWT/CSRF/crypto secrets, swap globals только если необходимые получены.
|
||
* Reentrant-safe.
|
||
* H18 — returns RefreshResult так что caller знает реально ли refresh succeeded.
|
||
*/
|
||
export async function refreshAllKeys(): Promise<RefreshResult> {
|
||
if (inflight) return inflight;
|
||
inflight = doRefresh().finally(() => {
|
||
inflight = null;
|
||
});
|
||
return inflight;
|
||
}
|
||
|
||
async function doRefresh(): Promise<RefreshResult> {
|
||
const { addr, roleId, secretId, mount, jwtKidPath, jwtKidsPrefix, csrfPath, cryptoKeyPath } = env.vault;
|
||
const fail = (reason: string): RefreshResult => {
|
||
consecutiveFailures += 1;
|
||
lastRefresh = { ok: false, reason, timestamp: Date.now() };
|
||
logger.error(`Vault refresh failed: ${reason} (consecutive=${consecutiveFailures})`);
|
||
return lastRefresh;
|
||
};
|
||
|
||
if (!addr || !roleId || !secretId) {
|
||
return fail('vault_not_configured');
|
||
}
|
||
|
||
// КАЖДЫЙ refresh — свежий AppRole login. Vault token TTL обычно ≤1 час.
|
||
const token = await vaultAppRoleLogin(addr, roleId, secretId);
|
||
if (!token) {
|
||
return fail('approle_login_failed');
|
||
}
|
||
|
||
const jwtPromise = fetchJwtKeysFromVault(addr, token, mount, jwtKidPath, jwtKidsPrefix);
|
||
const csrfPromise = csrfPath ? fetchCsrfConfig(addr, token, mount, csrfPath) : Promise.resolve(null);
|
||
const cryptoPromise = cryptoKeyPath ? fetchMasterKey(addr, token, mount, cryptoKeyPath) : Promise.resolve(null);
|
||
|
||
const [jwtResult, csrfResult, cryptoResult] = await Promise.allSettled([jwtPromise, csrfPromise, cryptoPromise]);
|
||
|
||
if (jwtResult.status === 'rejected') {
|
||
return fail(`jwt_fetch_failed: ${jwtResult.reason?.message || jwtResult.reason}`);
|
||
}
|
||
if (csrfPath && csrfResult.status === 'rejected') {
|
||
return fail(`csrf_fetch_failed: ${csrfResult.reason?.message || csrfResult.reason}`);
|
||
}
|
||
// Master-key: первый load обязателен, дальнейшие failures толерантны.
|
||
if (cryptoKeyPath && !isCryptoReady() && cryptoResult.status === 'rejected') {
|
||
return fail(`crypto_fetch_failed: ${cryptoResult.reason?.message || cryptoResult.reason}`);
|
||
}
|
||
|
||
// Atomic swap. JS single-threaded → observers видят либо все старые, либо все новые.
|
||
swapKeyMap(jwtResult.value);
|
||
if (csrfResult.status === 'fulfilled' && csrfResult.value) {
|
||
swapCsrfConfig(csrfResult.value);
|
||
}
|
||
if (cryptoResult.status === 'fulfilled' && cryptoResult.value) {
|
||
if (!isCryptoReady()) {
|
||
swapMasterKey(cryptoResult.value);
|
||
logger.info('Crypto master key loaded');
|
||
} else if (!masterKeyMatches(cryptoResult.value)) {
|
||
// H16 — master-key drift detected. По умолчанию FATAL: если operator не выставил
|
||
// ALLOW_MASTER_KEY_ROTATION=true explicitly, мы НЕ продолжаем silently на старом key.
|
||
const allowRotation = process.env.ALLOW_MASTER_KEY_ROTATION === 'true';
|
||
const msg = 'Vault crypto/master key DIFFERS from in-memory key. ALL existing encrypted_mnemonic will become undecryptable.';
|
||
if (allowRotation) {
|
||
logger.warn(msg + ' (continuing because ALLOW_MASTER_KEY_ROTATION=true)');
|
||
} else {
|
||
logger.error(msg + ' Set ALLOW_MASTER_KEY_ROTATION=true to acknowledge migration intent. FATAL — service will exit.');
|
||
// Defer exit so rest of refresh logs flush
|
||
setImmediate(() => process.exit(1));
|
||
return fail('master_key_drift');
|
||
}
|
||
}
|
||
}
|
||
|
||
consecutiveFailures = 0;
|
||
lastRefresh = { ok: true, timestamp: Date.now() };
|
||
logger.info(
|
||
`Keys refreshed atomically: JWT keys=${getKeyMapSize()}` +
|
||
(csrfPath ? `, CSRF=${csrfResult.status === 'fulfilled' ? 'updated' : 'unchanged'}` : '') +
|
||
`, Crypto=${isCryptoReady() ? 'ready' : 'NOT-READY'}`
|
||
);
|
||
return lastRefresh;
|
||
}
|
||
|
||
export function startKeyRotation(intervalMs: number = DEFAULT_INTERVAL_MS): void {
|
||
if (timer) return;
|
||
timer = setInterval(() => {
|
||
logger.info('Refreshing keys from Vault...');
|
||
void refreshAllKeys().catch((err) =>
|
||
logger.error(`Key rotation tick failed: ${err?.message || err}`)
|
||
);
|
||
}, intervalMs);
|
||
logger.info(`Key rotation scheduled (every ${intervalMs}ms)`);
|
||
}
|
||
|
||
export function stopKeyRotation(): void {
|
||
if (timer) {
|
||
clearInterval(timer);
|
||
timer = null;
|
||
logger.info('Key rotation stopped');
|
||
}
|
||
}
|