feat: Node Health module — NC1 + TX1 thermal and system monitoring

- New route: /admin/node-health (30s auto-refresh)
- Temps via lm-sensors (k10temp + NVMe) displayed in both °C and °F
- RAM and disk progress bars with color thresholds
- Load averages, CPU %, uptime per node
- Nav item added under Operations
- lm-sensors installed on NC1 and TX1

Task #28 | Chronicler #88
This commit is contained in:
Claude Chronicler #88
2026-04-14 06:21:38 +00:00
parent c3af0d51e4
commit a01d7b9d7f
4 changed files with 576 additions and 0 deletions

View File

@@ -23,6 +23,7 @@ const aboutRouter = require('./about');
const mcpLogsRouter = require('./mcp-logs');
const tasksRouter = require('./tasks');
const forgeRouter = require('./forge');
const nodeHealthRouter = require('./node-health');
router.use(requireTrinityAccess);
@@ -133,5 +134,6 @@ router.use('/about', aboutRouter);
router.use('/mcp-logs', mcpLogsRouter);
router.use('/tasks', tasksRouter);
router.use('/forge', forgeRouter);
router.use('/node-health', nodeHealthRouter);
module.exports = router;

View File

@@ -0,0 +1,223 @@
const express = require('express');
const router = express.Router();
/**
* Node Health Module — Trinity Console
*
* Live thermal and system health monitoring for NC1 and TX1 game nodes.
* Pulls CPU temps, NVMe temps, RAM, disk, load, and uptime via Trinity Core.
*
* GET /admin/node-health — Main page
* GET /admin/node-health/data — JSON data endpoint (HTMX auto-refresh)
*
* Chronicler #88 | April 14, 2026
*/
const TRINITY_CORE_URL = 'https://mcp.firefrostgaming.com';
const TRINITY_CORE_TOKEN = 'FFG-Trinity-2026-Core-Access';
const CACHE_TTL = 30000; // 30 seconds
const NODES = {
'nc1-charlotte': { label: 'NC1 Charlotte', role: 'Game Node (Secondary)', color: '#4ECDC4' },
'tx1-dallas': { label: 'TX1 Dallas', role: 'Game Node (Primary)', color: '#FF6B35' }
};
let nodeCache = { data: null, lastFetch: 0 };
async function trinityExec(server, command) {
try {
const res = await fetch(`${TRINITY_CORE_URL}/exec`, {
method: 'POST',
headers: {
'Content-Type': 'application/json',
'Authorization': `Bearer ${TRINITY_CORE_TOKEN}`
},
body: JSON.stringify({ server, command })
});
if (!res.ok) throw new Error(`Trinity Core error: ${res.status}`);
const data = await res.json();
return data.output || data.stdout || '';
} catch (err) {
console.error(`[NODE-HEALTH] trinityExec failed for ${server}:`, err.message);
return null;
}
}
function cToF(c) {
return Math.round((c * 9 / 5) + 32);
}
function parseSensors(output) {
if (!output) return null;
const lines = output.split('\n');
const temps = {};
// k10temp block — CPU temps
let inK10 = false;
for (const line of lines) {
if (line.startsWith('k10temp')) { inK10 = true; continue; }
if (inK10 && line.trim() === '') { inK10 = false; continue; }
if (inK10) {
const match = line.match(/^(\w+):\s+\+?([\d.]+)°C/);
if (match) temps[match[1]] = parseFloat(match[2]);
}
}
// nvme block — NVMe composite temp
let inNvme = false;
for (const line of lines) {
if (line.startsWith('nvme')) { inNvme = true; continue; }
if (inNvme && line.trim() === '') { inNvme = false; continue; }
if (inNvme) {
const match = line.match(/^Composite:\s+\+?([\d.]+)°C/);
if (match) { temps['NVMe'] = parseFloat(match[1]); break; }
}
}
return temps;
}
function parseNodeData(sensorsOut, statsOut) {
const temps = parseSensors(sensorsOut);
if (!statsOut) return null;
const lines = statsOut.split('\n');
const get = (marker) => {
const idx = lines.findIndex(l => l.includes(marker));
return idx >= 0 && idx + 1 < lines.length ? lines[idx + 1]?.trim() : '';
};
// RAM
const memLine = get('=== RAM ===');
const memParts = memLine.split(/\s+/);
const totalRam = memParts[1] || '?';
const usedRam = memParts[2] || '?';
function toGi(val) {
if (!val) return 0;
const num = parseFloat(val);
if (val.includes('Mi')) return num / 1024;
if (val.includes('Gi')) return num;
return num;
}
const ramPct = toGi(totalRam) > 0
? Math.round((toGi(usedRam) / toGi(totalRam)) * 100)
: 0;
// Disk
const diskLine = get('=== DISK ===');
const diskParts = diskLine.split(/\s+/);
const totalDisk = diskParts[1] || '?';
const usedDisk = diskParts[2] || '?';
const diskPct = parseInt(diskParts[4]) || 0;
// Uptime & load
const uptimeLine = get('=== UPTIME ===');
const uptimeMatch = uptimeLine.match(/up\s+(.+?),\s+\d+\s+user/);
const uptime = uptimeMatch ? uptimeMatch[1].trim() : '?';
const loadMatch = uptimeLine.match(/load average:\s*([\d.]+),\s*([\d.]+),\s*([\d.]+)/);
const load = loadMatch
? [parseFloat(loadMatch[1]), parseFloat(loadMatch[2]), parseFloat(loadMatch[3])]
: [0, 0, 0];
// CPU %
const cpuLine = get('=== CPU_USAGE ===');
const idleMatch = cpuLine.match(/(\d+\.\d+)\s+id/);
const cpuPct = idleMatch ? Math.round(100 - parseFloat(idleMatch[1])) : 0;
// Build temp display with both C and F
const tempDisplay = {};
if (temps) {
for (const [key, c] of Object.entries(temps)) {
tempDisplay[key] = { c, f: cToF(c) };
}
}
return {
temps: tempDisplay,
ram: { total: totalRam, used: usedRam, pct: ramPct },
disk: { total: totalDisk, used: usedDisk, pct: diskPct },
uptime,
load,
cpuPct,
online: true
};
}
async function fetchNodeHealth() {
const now = Date.now();
if (nodeCache.data && (now - nodeCache.lastFetch < CACHE_TTL)) {
return nodeCache.data;
}
console.log('[NODE-HEALTH] Fetching node health data...');
const statsCommand = [
'echo "=== RAM ===" && free -h | grep Mem',
'echo "=== DISK ===" && df -h / | tail -1',
'echo "=== UPTIME ===" && uptime',
'echo "=== CPU_USAGE ===" && top -bn1 | grep "Cpu(s)"'
].join(' && ');
const nodeIds = Object.keys(NODES);
const [nc1Sensors, tx1Sensors, nc1Stats, tx1Stats] = await Promise.all([
trinityExec('nc1-charlotte', 'sensors'),
trinityExec('tx1-dallas', 'sensors'),
trinityExec('nc1-charlotte', statsCommand),
trinityExec('tx1-dallas', statsCommand)
]);
const data = {
'nc1-charlotte': {
...NODES['nc1-charlotte'],
...(parseNodeData(nc1Sensors, nc1Stats) || { online: false })
},
'tx1-dallas': {
...NODES['tx1-dallas'],
...(parseNodeData(tx1Sensors, tx1Stats) || { online: false })
},
fetchedAt: new Date().toISOString()
};
nodeCache = { data, lastFetch: now };
console.log('[NODE-HEALTH] Fetch complete.');
return data;
}
// GET /admin/node-health
router.get('/', async (req, res) => {
try {
const nodes = await fetchNodeHealth();
res.render('admin/node-health/index', {
title: 'Node Health',
currentPath: '/node-health',
nodes,
adminUser: req.user,
layout: 'layout'
});
} catch (err) {
console.error('[NODE-HEALTH] Route error:', err);
res.render('admin/node-health/index', {
title: 'Node Health',
currentPath: '/node-health',
nodes: null,
error: err.message,
adminUser: req.user,
layout: 'layout'
});
}
});
// GET /admin/node-health/data — JSON for auto-refresh
router.get('/data', async (req, res) => {
nodeCache = { data: null, lastFetch: 0 }; // force fresh
try {
const nodes = await fetchNodeHealth();
res.json({ success: true, nodes });
} catch (err) {
res.status(500).json({ success: false, error: err.message });
}
});
module.exports = router;

View File

@@ -0,0 +1,348 @@
<!-- Node Health Module — Trinity Console -->
<!-- Chronicler #88 | April 14, 2026 -->
<style>
@import url('https://fonts.googleapis.com/css2?family=JetBrains+Mono:wght@400;500;600;700&display=swap');
#node-health { font-family: 'JetBrains Mono', 'SF Mono', monospace; color: #e0e0e0; }
#node-health * { box-sizing: border-box; }
.nh-grid {
display: grid;
grid-template-columns: 1fr 1fr;
gap: 20px;
margin-top: 16px;
}
@media (max-width: 900px) {
.nh-grid { grid-template-columns: 1fr; }
}
.nh-node-card {
background: #2d2d2d;
border: 1px solid #404040;
border-radius: 10px;
overflow: hidden;
}
.nh-node-header {
padding: 14px 18px;
display: flex;
align-items: center;
gap: 10px;
border-bottom: 1px solid #404040;
}
.nh-node-dot {
width: 10px; height: 10px;
border-radius: 50%;
flex-shrink: 0;
}
.nh-node-title { font-size: 15px; font-weight: 700; }
.nh-node-role { font-size: 10px; color: #888; margin-left: auto; }
.nh-body { padding: 16px 18px; }
.nh-section-title {
font-size: 9px;
font-weight: 700;
color: #666;
text-transform: uppercase;
letter-spacing: 0.1em;
margin: 14px 0 8px;
}
.nh-section-title:first-child { margin-top: 0; }
/* Stat row */
.nh-stat-row {
display: flex;
justify-content: space-between;
align-items: center;
padding: 5px 0;
border-bottom: 1px solid #333;
font-size: 12px;
}
.nh-stat-row:last-child { border-bottom: none; }
.nh-stat-label { color: #888; }
.nh-stat-value { font-weight: 600; color: #e0e0e0; text-align: right; }
/* Temp row — shows both C and F */
.nh-temp-row {
display: flex;
justify-content: space-between;
align-items: center;
padding: 5px 0;
border-bottom: 1px solid #333;
font-size: 12px;
}
.nh-temp-row:last-child { border-bottom: none; }
.nh-temp-label { color: #888; }
.nh-temp-values { display: flex; gap: 10px; align-items: center; }
.nh-temp-c { font-weight: 700; }
.nh-temp-f { color: #777; font-size: 11px; }
/* Color thresholds */
.temp-ok { color: #4ade80; }
.temp-warm { color: #facc15; }
.temp-hot { color: #f97316; }
.temp-crit { color: #ef4444; }
/* Progress bars */
.nh-bar-wrap {
background: #1a1a1a;
border-radius: 4px;
height: 6px;
margin-top: 4px;
overflow: hidden;
}
.nh-bar {
height: 100%;
border-radius: 4px;
transition: width 0.4s ease;
}
.bar-ok { background: #4ade80; }
.bar-warn { background: #facc15; }
.bar-crit { background: #ef4444; }
.nh-bar-label {
display: flex;
justify-content: space-between;
font-size: 11px;
color: #888;
margin-top: 3px;
}
/* Load averages */
.nh-load-row {
display: flex;
gap: 10px;
}
.nh-load-pill {
flex: 1;
background: #1a1a1a;
border-radius: 6px;
padding: 8px;
text-align: center;
}
.nh-load-val { font-size: 16px; font-weight: 700; color: #e0e0e0; }
.nh-load-lbl { font-size: 9px; color: #666; margin-top: 2px; }
/* Uptime / CPU row */
.nh-info-row {
display: flex;
gap: 10px;
margin-bottom: 4px;
}
.nh-info-pill {
flex: 1;
background: #1a1a1a;
border-radius: 6px;
padding: 8px 10px;
}
.nh-info-pill .val { font-size: 13px; font-weight: 700; }
.nh-info-pill .lbl { font-size: 9px; color: #666; margin-top: 2px; }
/* Offline state */
.nh-offline {
text-align: center;
padding: 40px;
color: #ef4444;
font-size: 13px;
}
/* Header row */
.nh-header-row {
display: flex;
align-items: center;
justify-content: space-between;
margin-bottom: 4px;
}
.nh-last-updated {
font-size: 10px;
color: #555;
}
.nh-refresh-btn {
background: #333;
border: 1px solid #555;
border-radius: 6px;
color: #aaa;
font-family: inherit;
font-size: 11px;
padding: 5px 12px;
cursor: pointer;
transition: background 0.2s;
}
.nh-refresh-btn:hover { background: #444; color: #fff; }
.nh-refresh-btn:disabled { opacity: 0.5; cursor: not-allowed; }
.nh-spinner { display: none; }
.nh-refreshing .nh-spinner { display: inline; }
.nh-refreshing .nh-btn-text { display: none; }
</style>
<div id="node-health">
<div class="nh-header-row">
<div>
<h2 style="margin:0; font-size:18px; font-weight:700;">Node Health</h2>
<div class="nh-last-updated" id="nh-last-updated">
<% if (nodes) { %>Last updated: <%= new Date(nodes.fetchedAt).toLocaleTimeString() %><% } %>
</div>
</div>
<button class="nh-refresh-btn" id="nh-refresh-btn" onclick="refreshNodeHealth()">
<span class="nh-btn-text">↻ Refresh</span>
<span class="nh-spinner">Refreshing…</span>
</button>
</div>
<div class="nh-grid" id="nh-grid">
<% if (!nodes) { %>
<div style="grid-column:1/-1; color:#ef4444; padding:20px;">Failed to load node data. <%= error || '' %></div>
<% } else { %>
<% const nodeIds = ['nc1-charlotte', 'tx1-dallas']; %>
<% nodeIds.forEach(function(id) { %>
<% const node = nodes[id]; %>
<%- include('_node_card', { node, id }) %>
<% }); %>
<% } %>
</div>
</div>
<script>
// Auto-refresh every 30 seconds
let refreshTimer = setInterval(refreshNodeHealth, 30000);
async function refreshNodeHealth() {
const btn = document.getElementById('nh-refresh-btn');
btn.disabled = true;
btn.classList.add('nh-refreshing');
try {
const res = await fetch('/admin/node-health/data');
const json = await res.json();
if (json.success) {
renderNodes(json.nodes);
document.getElementById('nh-last-updated').textContent =
'Last updated: ' + new Date(json.nodes.fetchedAt).toLocaleTimeString();
}
} catch (err) {
console.error('Node health refresh failed:', err);
}
btn.disabled = false;
btn.classList.remove('nh-refreshing');
}
function tempClass(c) {
if (c >= 85) return 'temp-crit';
if (c >= 70) return 'temp-hot';
if (c >= 55) return 'temp-warm';
return 'temp-ok';
}
function barClass(pct) {
if (pct >= 85) return 'bar-crit';
if (pct >= 70) return 'bar-warn';
return 'bar-ok';
}
function renderNodes(nodes) {
const grid = document.getElementById('nh-grid');
const ids = ['nc1-charlotte', 'tx1-dallas'];
let html = '';
ids.forEach(id => {
const n = nodes[id];
html += renderCard(id, n);
});
grid.innerHTML = html;
}
function renderCard(id, n) {
if (!n.online) {
return `<div class="nh-node-card">
<div class="nh-node-header">
<div class="nh-node-dot" style="background:#ef4444;"></div>
<span class="nh-node-title">${n.label}</span>
<span class="nh-node-role">${n.role}</span>
</div>
<div class="nh-offline">⚠ Node unreachable</div>
</div>`;
}
// Temps
let tempRows = '';
if (n.temps) {
for (const [key, t] of Object.entries(n.temps)) {
const cls = tempClass(t.c);
tempRows += `<div class="nh-temp-row">
<span class="nh-temp-label">${key}</span>
<div class="nh-temp-values">
<span class="nh-temp-c ${cls}">${t.c}°C</span>
<span class="nh-temp-f">${t.f}°F</span>
</div>
</div>`;
}
}
// RAM bar
const ramBar = barClass(n.ram.pct);
// Disk bar
const diskBar = barClass(n.disk.pct);
return `<div class="nh-node-card">
<div class="nh-node-header">
<div class="nh-node-dot" style="background:${n.color};"></div>
<span class="nh-node-title">${n.label}</span>
<span class="nh-node-role">${n.role}</span>
</div>
<div class="nh-body">
<div class="nh-info-row">
<div class="nh-info-pill">
<div class="val">${n.uptime}</div>
<div class="lbl">UPTIME</div>
</div>
<div class="nh-info-pill">
<div class="val">${n.cpuPct}%</div>
<div class="lbl">CPU USAGE</div>
</div>
</div>
<div class="nh-section-title">Load Average</div>
<div class="nh-load-row">
<div class="nh-load-pill">
<div class="nh-load-val">${n.load[0].toFixed(2)}</div>
<div class="nh-load-lbl">1 MIN</div>
</div>
<div class="nh-load-pill">
<div class="nh-load-val">${n.load[1].toFixed(2)}</div>
<div class="nh-load-lbl">5 MIN</div>
</div>
<div class="nh-load-pill">
<div class="nh-load-val">${n.load[2].toFixed(2)}</div>
<div class="nh-load-lbl">15 MIN</div>
</div>
</div>
<div class="nh-section-title">Memory</div>
<div class="nh-bar-wrap">
<div class="nh-bar ${ramBar}" style="width:${n.ram.pct}%"></div>
</div>
<div class="nh-bar-label">
<span>${n.ram.used} used</span>
<span>${n.ram.pct}% of ${n.ram.total}</span>
</div>
<div class="nh-section-title">Disk</div>
<div class="nh-bar-wrap">
<div class="nh-bar ${diskBar}" style="width:${n.disk.pct}%"></div>
</div>
<div class="nh-bar-label">
<span>${n.disk.used} used</span>
<span>${n.disk.pct}% of ${n.disk.total}</span>
</div>
<div class="nh-section-title">Temperatures</div>
${tempRows || '<div style="color:#666;font-size:11px;">No sensor data</div>'}
</div>
</div>`;
}
</script>

View File

@@ -141,6 +141,9 @@
<a href="/admin/infrastructure" class="block px-4 py-2 rounded-md <%= currentPath.startsWith('/infrastructure') ? 'bg-gray-200 dark:bg-gray-700' : 'hover:bg-gray-100 dark:hover:bg-gray-800' %>">
🌐 Infrastructure
</a>
<a href="/admin/node-health" class="block px-4 py-2 rounded-md <%= currentPath.startsWith('/node-health') ? 'bg-gray-200 dark:bg-gray-700' : 'hover:bg-gray-100 dark:hover:bg-gray-800' %>">
🌡️ Node Health
</a>
<a href="/admin/scheduler" class="block px-4 py-2 rounded-md <%= currentPath.startsWith('/scheduler') ? 'bg-gray-200 dark:bg-gray-700' : 'hover:bg-gray-100 dark:hover:bg-gray-800' %>">
⏰ Scheduler
</a>