#!/usr/bin/env node --max-old-space-size=4096 // Simple node script that provides a jq alternative for processing JSON on the command line. // Supports newline separated JSON (JSON lines) as well as JSON data. // Also supports log data where some lines are JSON and some aren't and log lines where the // beginning of the line is text and the end of the line is JSON, i.e.: // // 2022-10-18T14:07:53.960Z [INFO ] starting server with config: {"port":3000} // // USAGE: // // With pipe (can break for large amounts of JSON data) // // cat some-data.json | json // // With a file path (works better for large amounts of data): // // json // // The JSON data is available in a `data` variable. If the JavaScript code argument starts with a "." then that is equivalent to starting it with "data.". // // You can use map and filter and useful lodash functions like get: // // cat some-data.json | json ".Items.map(i => ({id: get(i, 'id.S'), updatedAt: get(i, 'updatedAt.N')})).filter(i => new Date(Number(i.updatedAt)) < new Date())" // // You can access the JSON data with the data variable: // // cat some-data.json | json "Object.keys(data)" // // Split complex processing with pipes if it helps readability: // // cat some-data.json \ // | json ".Items.map(i => ({id: get(i, 'id.S'), updatedAt: get(i, 'updatedAt.N')}))" \ // | json ".filter(i => new Date(Number(i.updatedAt)) < new Date())" // // Easily print lengths of arrays or keys of objects etc: // // cat some-data.json | json ".Items.length" // // Pretty print (echo) JSON data: // // cat some-data.json | json . // // Raw output (i.e. a raw string/number or line separated values etc.) // // cat some-data.json | RAW=true json '.filter(l => l.message && l.message.includes("Request failed")).map(l => l.status).join("\n")' const fs = require('fs') const readline = require('readline') const _ = require('lodash') Object.assign(global, require('lodash')) const R = require('ramda') const { diff } = require('object-diffy') // https://stackoverflow.com/questions/1248302/how-to-get-the-size-of-a-javascript-object function roughSizeOfObject(object) { var objectList = []; var stack = [object]; var bytes = 0; while (stack.length) { var value = stack.pop(); if (typeof value === 'boolean') { bytes += 4; } else if (typeof value === 'string') { bytes += value.length * 2; } else if (typeof value === 'number') { bytes += 8; } else if ( typeof value === 'object' && objectList.indexOf(value) === -1 ) { objectList.push(value); for (var i in value) { stack.push(value[i]); } } } return bytes; } function readStdIn() { return fs.readFileSync(0).toString() } function parseLine(line, openLines) { let result = { openLines: [...openLines] } try { const openIndex = line.indexOf('{') if (openLines.length === 0 && openIndex >= 0 && line.endsWith('}')) { const doc = JSON.parse(line.substring(openIndex)) if (openIndex > 0 && !doc._line) doc._line = line.substring(0, openIndex) result.parsedLine = doc } else if (line === '{') { result.openLines = [line] } else if (openLines.length > 0) { result.openLines.push(line) if (line === '}') { result.parsedLine = JSON.parse(openLines.join('\n')) result.openLines = [] } } else { result.parsedLine = { _line: line } } } catch (err) { result.error = `Error thrown parsing line: ${line} - ${err.stack}` result.openLines = [] result.parsedLine = { _line: line } } return result } async function jsonIn(filePath) { let textInput try { if (filePath) { return require(filePath) } else { textInput = readStdIn() // NOTE: I've found JSON.parse intermittently errors out for data sizes around 15 MB but require(filePath) can handle more? // const dataSizeMb = Buffer.byteLength(textInput) / 1024 / 1024 return JSON.parse(textInput) } } catch (jsonErr) { try { const lines = [] let openLines = [] let nErrors = 0 let lineCount = 0 const processLine = (line) => { lineCount += 1 const result = parseLine(line, openLines) if (result.error) { console.log(error) nErrors += 1 } if (result.parsedLine) lines.push(result.parsedLine) openLines = result.openLines } if (filePath) { const rl = readline.createInterface({ input: fs.createReadStream(filePath), crlfDelay: Infinity }); for await (const line of rl) { processLine(line) } } else { textInput = textInput || readStdIn() const textLines = textInput.trim().split('\n').map(l => l.trim()).filter(Boolean) for (const [index, line] of textLines.entries()) { processLine(line) } } if (nErrors > 0) console.log(`Failed to parse ${nErrors}/${textLines.length} lines due to errors`) return lines } catch (linesErr) { console.log(jsonErr.stack) console.log(linesErr.stack) throw new Error('Could not parse input as JSON or as JSON lines') } } } function printJson(data) { console.log(JSON.stringify(data, null, 4)) } function printJsonLines(data) { for (const line of data) { console.log(JSON.stringify(line)) } } function getCodeArg() { let code = process.argv[2] || 'data' // Support jq like dot syntax if (code === '.') { code = 'data' } else if (code.startsWith('.')) { code = 'data' + code; } return code } // Helper functions function countBy(data, fn) { return Object.entries(_.mapValues(_.groupBy(data, fn), l => l.length)).sort((a, b) => b[1] - a[1]) } async function main() { const code = getCodeArg() const filePath = process.argv[3] const data = await jsonIn(filePath) const processedData = eval(code) if (process.env.RAW === 'true') { console.log(processedData); } else if (process.env.JSONL === 'true' && Array.isArray(processedData)) { printJsonLines(processedData) } else { printJson(processedData) } } main()