-
-
Save yekver/34c9d41c1c4ea478151574ea539e9953 to your computer and use it in GitHub Desktop.
| const prom = require('prom-client'); | |
| const pm2 = require('pm2'); | |
| let pm2Bus; | |
| const REQ_TOPIC = 'get_prom_register'; | |
| function pm2exec(cmd, ...args) { | |
| return new Promise((resolve, reject) => { | |
| pm2[cmd](...args, (err, resp) => (err ? reject(err) : resolve(resp))); | |
| }); | |
| } | |
| function getOnlineInstances(instancesData) { | |
| return instancesData.filter(({ pm2_env }) => pm2_env.status === 'online'); | |
| } | |
| function getMainMetricsRegister(instancesData) { | |
| // don't use prom.register here because these metrics | |
| // will be summed in cluster mode! | |
| const registry = new prom.Registry(); | |
| const mainMetrics = [ | |
| { name: 'up', help: 'Is the process running' }, | |
| { name: 'cpu', help: 'Process cpu usage' }, | |
| { name: 'memory', help: 'Process memory usage' }, | |
| { name: 'heap_size', help: 'Process heap size' }, | |
| { name: 'used_heap_size', help: 'Process heap usage' }, | |
| { name: 'uptime', help: 'Process uptime' }, | |
| { name: 'instances', help: 'Process instances' }, | |
| { name: 'restarts', help: 'Process restarts' }, | |
| { name: 'loop_delay', help: 'Event Loop Latency' }, | |
| { name: 'loop_delay_p95', help: 'Event Loop Latency p95' }, | |
| ].reduce((acc, { name, help }) => { | |
| acc[name] = new prom.Gauge({ | |
| name, | |
| help, | |
| labelNames: ['name', 'instance'], | |
| registers: [registry], | |
| }); | |
| return acc; | |
| }, {}); | |
| instancesData.forEach(({ name, pm2_env, monit }) => { | |
| const conf = { | |
| name: name, | |
| instance: pm2_env.pm_id, | |
| }; | |
| const axm = pm2_env.axm_monitor; | |
| const values = { | |
| up: pm2_env.status === 'online' ? 1 : 0, | |
| cpu: monit.cpu, | |
| memory: monit.memory, | |
| heap_size: parseFloat(axm['Heap Size'].value) || null, | |
| used_heap_size: parseFloat(axm['Used Heap Size'].value) || null, | |
| uptime: Math.round((Date.now() - pm2_env.pm_uptime) / 1000), | |
| instances: pm2_env.instances || 1, | |
| restarts: pm2_env.unstable_restarts + pm2_env.restart_time, | |
| loop_delay: parseFloat(axm['Event Loop Latency'].value) || null, | |
| loop_delay_p95: parseFloat(axm['Event Loop Latency p95'].value) || null, | |
| }; | |
| Object.entries(values).forEach(([name, value]) => { | |
| if (value !== null) { | |
| mainMetrics[name].set(conf, value); | |
| } | |
| }); | |
| }); | |
| return registry; | |
| } | |
| function requestNeighboursData(instancesData, instancesToWait) { | |
| const targetInstanceId = Number(process.env.pm_id); | |
| const data = { topic: REQ_TOPIC, data: { targetInstanceId } }; | |
| Object.values(instancesData).forEach(({ pm_id }) => { | |
| if (pm_id !== targetInstanceId) { | |
| pm2exec('sendDataToProcessId', pm_id, data).catch(e => { | |
| instancesToWait.count--; | |
| console.error(`Failed to request metrics from instance #${pm_id}: ${e.message}`); | |
| }); | |
| } | |
| }); | |
| } | |
| function getCurrentRegistry(instancesData) { | |
| return prom.AggregatorRegistry.aggregate([ | |
| getMainMetricsRegister(instancesData).getMetricsAsJSON(), | |
| prom.register.getMetricsAsJSON(), | |
| ]); | |
| } | |
| async function getAggregatedRegistry(instancesData) { | |
| const onlineInstances = getOnlineInstances(instancesData); | |
| let instancesToWait = { count: onlineInstances.length }; | |
| const registryPromise = new Promise(async (resolve, reject) => { | |
| const registersList = []; | |
| const instanceId = Number(process.env.pm_id); | |
| const eventName = `process:${instanceId}`; | |
| let responcesCount = 1; | |
| let timeoutId; | |
| function sendResult() { | |
| pm2Bus.off(eventName); | |
| resolve(prom.AggregatorRegistry.aggregate(registersList)); | |
| } | |
| function kickNoResponseTimeout() { | |
| timeoutId = setTimeout(() => { | |
| console.warn( | |
| `Metrics were sent by timeout. No response from ${instancesToWait.count - | |
| responcesCount} instances.`, | |
| ); | |
| sendResult(); | |
| }, 1000); | |
| } | |
| try { | |
| registersList[instanceId] = getCurrentRegistry( | |
| instancesData, | |
| ).getMetricsAsJSON(); | |
| if (!pm2Bus) { | |
| pm2Bus = await pm2exec('launchBus'); | |
| } | |
| kickNoResponseTimeout(); | |
| pm2Bus.on(eventName, packet => { | |
| registersList[packet.data.instanceId] = packet.data.register; | |
| responcesCount++; | |
| clearTimeout(timeoutId); | |
| if (responcesCount === instancesToWait.count) { | |
| sendResult(); | |
| } else { | |
| kickNoResponseTimeout(); | |
| } | |
| }); | |
| } catch (e) { | |
| reject(e); | |
| } | |
| }); | |
| // this function must be called after the registryPromise declaration | |
| // because requests have to be sent after the listener was setup. | |
| requestNeighboursData(onlineInstances, instancesToWait); | |
| return registryPromise; | |
| } | |
| // Listener | |
| process.on('message', packet => { | |
| if (packet.topic === REQ_TOPIC) { | |
| process.send({ | |
| type: `process:${packet.data.targetInstanceId}`, | |
| data: { | |
| instanceId: Number(process.env.pm_id), | |
| register: prom.register.getMetricsAsJSON(), | |
| }, | |
| }); | |
| } | |
| }); | |
| (async () => await pm2exec('connect'))(); | |
| module.exports = async (req, res) => { | |
| let responceData; | |
| try { | |
| const instancesData = await pm2exec('list'); | |
| const register = | |
| getOnlineInstances(instancesData).length > 1 | |
| ? await getAggregatedRegistry(instancesData) | |
| : getCurrentRegistry(instancesData); | |
| responceData = register.metrics(); | |
| } catch (err) { | |
| console.error(`Failed to get metrics: ${err.message}`); | |
| } finally { | |
| res.set('Content-Type', prom.register.contentType); | |
| res.end(responceData); | |
| } | |
| }; |
When doing pm2 reload on the pm2 cluster, the data is being messed up, I'm not sure why and I don't know how to solve it.
When doing pm2 reload on the pm2 cluster, the data is being messed up, I'm not sure why and I don't know how to solve it.
please, check out the updated version of this gist
Is there an example/documentation on how to add this to my nodejs express project?
@FidelisChimombe if you are trying to implement prometheus to monitor APIs (like response time and so on) we ended up using https://github.com/nginxinc/nginx-prometheus-exporter since we use Nginx as a reverse proxy for our Node.js pm2 server. So if you use Nginx too you can use that exporter, it is a stable package.
Big thanks for this gist. Currently API was changed, but main logic works
Thanks for this Gist, your solution works well also with
swagger-stats. We're going to include this in the docs, too!