Skip to content

Instantly share code, notes, and snippets.

@dlamblin
Last active July 2, 2025 21:54
Show Gist options
  • Save dlamblin/2e83a001fd921f33e92ac41614471d26 to your computer and use it in GitHub Desktop.
Save dlamblin/2e83a001fd921f33e92ac41614471d26 to your computer and use it in GitHub Desktop.
Limited Terraform HCL .tf conversion to JSON .json output.
#!/usr/bin/env python
from argparse import ArgumentParser
from fileinput import input
from itertools import groupby
from json import dumps
from typing import Generator
"""
The intent of this script is to convert the many versions.tf files found
with `find . -name versions.tf` (finds 124 files in my repo) into json, so
that we can use `jq -r '.terraform.required_providers|to_entries[]|
.value.version + " " + .value.source + " " + .key'` to get all the versions
One per line like:
~> 5.90.0 hashicorp/aws aws
We'll then be able to `sort -v`
If the terraform repo is in your CDPATH, hcl2json.py is in your PATH,
as are column, find, jq, sort & xargs, all put together, it would be:
cd terraform > /dev/null; \
find . -name versions.tf -print0 |\
xargs -0 hcl2json.py |\
jq -r '.terraform.required_providers|to_entries[]|
.value.version + " " + .value.source + " " + .key' |\
sort -Vrk4 -k2 | sort -uk4 | column -t; cd - > /dev/null
To get the highest version number for each required provider named.
E.G. the above produces output like:
~> 2.7.1 hashicorp/archive archive
~> 5.99.1 hashicorp/aws aws
= 4.20.0 cloudflare/cloudflare cloudflare
~> 3.65.0 datadog/datadog datadog
~> 3.4.3 hashicorp/dns dns
~> 6.4.0 integrations/github github
~> 2.16.1 hashicorp/helm helm
~> 2.37.0 hashicorp/kubernetes kubernetes
~> 2.4.0 hashicorp/local local
~> 3.2.4 hashicorp/null null
~> 1.25.0 cyrilgdn/postgresql postgresql
~> 3.7.1 hashicorp/random random
~> 0.13.1 hashicorp/time time
~> 4.1.0 hashicorp/tls tls
~> 4.8.0 hashicorp/vault vault
Known to "work" with Python 3.13.2
There's no error handling on the parsing, test on smaller inputs first.
"""
def main() -> None:
"""
Might convert HCL to JSON.
If HCL is valid and simple like versions.tf
"""
# old_process_lines_by_filename(); return
parser = ArgumentParser(
prog='hcl2json',
description='Loosely converts some Hashicorp Terraform HCL to JSON '
'without error handling',
epilog='copyright 2025 Daniel Lamblin MIT Licensed',
)
parser.add_argument('--version', action='version', version='%(prog)s 1.0')
parser.add_argument('-i', '--indent', nargs='?', type=int,
const=4, default=None,
help='indent INDENT spaces. Default 4, or one-line')
parser.add_argument('-s', '--sort-keys', action='store_true',
help='sort the keys in the JSON objects')
arg, file_names = parser.parse_known_args()
convert_input_with(file_names, **arg.__dict__)
def convert_input_with(
file_names: list[str],
indent: int | None = None,
sort_keys: bool = False,
) -> None:
for _, lines in groupby(
input_lines_by_filename(file_names),
key=lambda x: x[0],
):
print(dumps(
obj=process(''.join(line for _, line in lines)),
indent=indent,
sort_keys=sort_keys,
))
def input_lines_by_filename(
file_names: list[str],
) -> Generator[tuple[str, str]]:
with input(file_names) as all_input:
for line in all_input:
yield all_input.filename(), line
def process(content: str) -> dict:
"""
HCL to dict
E.G.
>>> process('''
... terraform {
... required_providers {
... aws = {
... source = "hashicorp/aws"
... version = "~> 5.90.0"
... }
... }
... required_version = "~> 1.4.7"
... }''')
{'terraform': {'required_providers': {'aws': {'source': 'hashicorp/aws', 'version': '~> 5.90.0'}}, 'required_version': '~> 1.4.7'}}
"""
tokens = tokenize(content)
dict, _ = build_from(tokens)
return dict
def tokenize(content: str) -> list[str]:
"""
Tokenize the content. An example demonstration:
>>> inp='A "quoted string" in a sentence. And:\nA list \n with lines '\
... '"inside\nquotes"\nso lets see it go.'
>>> print(inp);[t for s in [[t] if i%2==1 else [n for m in [l.split() +
... ['\n'] for l in t.splitlines()] for n in m][:-1] for i,t in
... enumerate(inp.split('"'))] for t in s]
A "quoted string" in a sentence. And:
A list
with lines "inside
quotes"
so lets see it go.
['A', 'quoted string', 'in', 'a', 'sentence.', 'And:', '\n', 'A',
'list', '\n', 'with', 'lines', 'inside\nquotes', '\n', 'so', 'lets',
'see', 'it', 'go.']
"""
alternating_quoted_tokens = content.split('"')
lists_of_tokens = [
[tok] if idx % 2 == 1 else [
token for sublist in
[line.split() + ['\n'] for line in tok.splitlines()]
for token in sublist
][:-1] for idx, tok in enumerate(alternating_quoted_tokens)
]
tokens = [token for sublist in lists_of_tokens for token in sublist]
return tokens
def build_from(tokens: list[str], idx: int = 0) -> tuple[dict, int]:
"""
Build a JSON object from tokens.
"""
dict = {}
key = None
while idx < len(tokens):
token = tokens[idx]
idx += 1
if token == '{':
dict[key], idx = build_from(tokens, idx)
key = None
elif token == '}':
return dict, idx
elif token == '=':
dict[key], idx = scan_for_value(tokens, idx)
key = None
elif token.startswith('#'):
# TODO(lamblin): fix for a quoted token that starts with # ?
while idx < len(tokens) and tokens[idx] != '\n':
idx += 1
# elif token == '[':
# dict[key], idx = build_from(tokens, idx)
# elif token == ']':
# return dict
# elif token == ',':
# continue
elif token == '\n':
continue
else:
key = token if not key else key + '__' + token
return dict, idx
def scan_for_value(tokens: list[str], idx: int) -> tuple[str | dict, int]:
"""
Scan for a value in tokens up to newline, closing brace, etc
"""
value = ''
depth = 0
while idx < len(tokens):
token = tokens[idx]
idx += 1
if token == '{' and not value:
return build_from(tokens, idx)
elif depth == 0 and token in ('}', '\n'):
return value, idx
elif token == '{':
value += f'-{depth}-'
depth += 1
elif token == '}':
depth -= 1
value += f'-{depth}-'
else:
value += f'__{token}' if value else token
return value, idx
def old_process_lines_by_filename() -> None:
"""This worked but I like groupby(input_lines_by_filename(… better."""
current_file: str = ''
current_content: str = ''
with input() as all_input:
for line in all_input:
if current_file != all_input.filename():
if current_content:
print(dumps(process(current_content)))
current_content = ''
current_file = all_input.filename()
current_content += line
if current_content:
print(dumps(process(current_content)))
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment