Last active
July 2, 2025 21:54
-
-
Save dlamblin/2e83a001fd921f33e92ac41614471d26 to your computer and use it in GitHub Desktop.
Limited Terraform HCL .tf conversion to JSON .json output.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env python | |
| from argparse import ArgumentParser | |
| from fileinput import input | |
| from itertools import groupby | |
| from json import dumps | |
| from typing import Generator | |
| """ | |
| The intent of this script is to convert the many versions.tf files found | |
| with `find . -name versions.tf` (finds 124 files in my repo) into json, so | |
| that we can use `jq -r '.terraform.required_providers|to_entries[]| | |
| .value.version + " " + .value.source + " " + .key'` to get all the versions | |
| One per line like: | |
| ~> 5.90.0 hashicorp/aws aws | |
| We'll then be able to `sort -v` | |
| If the terraform repo is in your CDPATH, hcl2json.py is in your PATH, | |
| as are column, find, jq, sort & xargs, all put together, it would be: | |
| cd terraform > /dev/null; \ | |
| find . -name versions.tf -print0 |\ | |
| xargs -0 hcl2json.py |\ | |
| jq -r '.terraform.required_providers|to_entries[]| | |
| .value.version + " " + .value.source + " " + .key' |\ | |
| sort -Vrk4 -k2 | sort -uk4 | column -t; cd - > /dev/null | |
| To get the highest version number for each required provider named. | |
| E.G. the above produces output like: | |
| ~> 2.7.1 hashicorp/archive archive | |
| ~> 5.99.1 hashicorp/aws aws | |
| = 4.20.0 cloudflare/cloudflare cloudflare | |
| ~> 3.65.0 datadog/datadog datadog | |
| ~> 3.4.3 hashicorp/dns dns | |
| ~> 6.4.0 integrations/github github | |
| ~> 2.16.1 hashicorp/helm helm | |
| ~> 2.37.0 hashicorp/kubernetes kubernetes | |
| ~> 2.4.0 hashicorp/local local | |
| ~> 3.2.4 hashicorp/null null | |
| ~> 1.25.0 cyrilgdn/postgresql postgresql | |
| ~> 3.7.1 hashicorp/random random | |
| ~> 0.13.1 hashicorp/time time | |
| ~> 4.1.0 hashicorp/tls tls | |
| ~> 4.8.0 hashicorp/vault vault | |
| Known to "work" with Python 3.13.2 | |
| There's no error handling on the parsing, test on smaller inputs first. | |
| """ | |
| def main() -> None: | |
| """ | |
| Might convert HCL to JSON. | |
| If HCL is valid and simple like versions.tf | |
| """ | |
| # old_process_lines_by_filename(); return | |
| parser = ArgumentParser( | |
| prog='hcl2json', | |
| description='Loosely converts some Hashicorp Terraform HCL to JSON ' | |
| 'without error handling', | |
| epilog='copyright 2025 Daniel Lamblin MIT Licensed', | |
| ) | |
| parser.add_argument('--version', action='version', version='%(prog)s 1.0') | |
| parser.add_argument('-i', '--indent', nargs='?', type=int, | |
| const=4, default=None, | |
| help='indent INDENT spaces. Default 4, or one-line') | |
| parser.add_argument('-s', '--sort-keys', action='store_true', | |
| help='sort the keys in the JSON objects') | |
| arg, file_names = parser.parse_known_args() | |
| convert_input_with(file_names, **arg.__dict__) | |
| def convert_input_with( | |
| file_names: list[str], | |
| indent: int | None = None, | |
| sort_keys: bool = False, | |
| ) -> None: | |
| for _, lines in groupby( | |
| input_lines_by_filename(file_names), | |
| key=lambda x: x[0], | |
| ): | |
| print(dumps( | |
| obj=process(''.join(line for _, line in lines)), | |
| indent=indent, | |
| sort_keys=sort_keys, | |
| )) | |
| def input_lines_by_filename( | |
| file_names: list[str], | |
| ) -> Generator[tuple[str, str]]: | |
| with input(file_names) as all_input: | |
| for line in all_input: | |
| yield all_input.filename(), line | |
| def process(content: str) -> dict: | |
| """ | |
| HCL to dict | |
| E.G. | |
| >>> process(''' | |
| ... terraform { | |
| ... required_providers { | |
| ... aws = { | |
| ... source = "hashicorp/aws" | |
| ... version = "~> 5.90.0" | |
| ... } | |
| ... } | |
| ... required_version = "~> 1.4.7" | |
| ... }''') | |
| {'terraform': {'required_providers': {'aws': {'source': 'hashicorp/aws', 'version': '~> 5.90.0'}}, 'required_version': '~> 1.4.7'}} | |
| """ | |
| tokens = tokenize(content) | |
| dict, _ = build_from(tokens) | |
| return dict | |
| def tokenize(content: str) -> list[str]: | |
| """ | |
| Tokenize the content. An example demonstration: | |
| >>> inp='A "quoted string" in a sentence. And:\nA list \n with lines '\ | |
| ... '"inside\nquotes"\nso lets see it go.' | |
| >>> print(inp);[t for s in [[t] if i%2==1 else [n for m in [l.split() + | |
| ... ['\n'] for l in t.splitlines()] for n in m][:-1] for i,t in | |
| ... enumerate(inp.split('"'))] for t in s] | |
| A "quoted string" in a sentence. And: | |
| A list | |
| with lines "inside | |
| quotes" | |
| so lets see it go. | |
| ['A', 'quoted string', 'in', 'a', 'sentence.', 'And:', '\n', 'A', | |
| 'list', '\n', 'with', 'lines', 'inside\nquotes', '\n', 'so', 'lets', | |
| 'see', 'it', 'go.'] | |
| """ | |
| alternating_quoted_tokens = content.split('"') | |
| lists_of_tokens = [ | |
| [tok] if idx % 2 == 1 else [ | |
| token for sublist in | |
| [line.split() + ['\n'] for line in tok.splitlines()] | |
| for token in sublist | |
| ][:-1] for idx, tok in enumerate(alternating_quoted_tokens) | |
| ] | |
| tokens = [token for sublist in lists_of_tokens for token in sublist] | |
| return tokens | |
| def build_from(tokens: list[str], idx: int = 0) -> tuple[dict, int]: | |
| """ | |
| Build a JSON object from tokens. | |
| """ | |
| dict = {} | |
| key = None | |
| while idx < len(tokens): | |
| token = tokens[idx] | |
| idx += 1 | |
| if token == '{': | |
| dict[key], idx = build_from(tokens, idx) | |
| key = None | |
| elif token == '}': | |
| return dict, idx | |
| elif token == '=': | |
| dict[key], idx = scan_for_value(tokens, idx) | |
| key = None | |
| elif token.startswith('#'): | |
| # TODO(lamblin): fix for a quoted token that starts with # ? | |
| while idx < len(tokens) and tokens[idx] != '\n': | |
| idx += 1 | |
| # elif token == '[': | |
| # dict[key], idx = build_from(tokens, idx) | |
| # elif token == ']': | |
| # return dict | |
| # elif token == ',': | |
| # continue | |
| elif token == '\n': | |
| continue | |
| else: | |
| key = token if not key else key + '__' + token | |
| return dict, idx | |
| def scan_for_value(tokens: list[str], idx: int) -> tuple[str | dict, int]: | |
| """ | |
| Scan for a value in tokens up to newline, closing brace, etc | |
| """ | |
| value = '' | |
| depth = 0 | |
| while idx < len(tokens): | |
| token = tokens[idx] | |
| idx += 1 | |
| if token == '{' and not value: | |
| return build_from(tokens, idx) | |
| elif depth == 0 and token in ('}', '\n'): | |
| return value, idx | |
| elif token == '{': | |
| value += f'-{depth}-' | |
| depth += 1 | |
| elif token == '}': | |
| depth -= 1 | |
| value += f'-{depth}-' | |
| else: | |
| value += f'__{token}' if value else token | |
| return value, idx | |
| def old_process_lines_by_filename() -> None: | |
| """This worked but I like groupby(input_lines_by_filename(… better.""" | |
| current_file: str = '' | |
| current_content: str = '' | |
| with input() as all_input: | |
| for line in all_input: | |
| if current_file != all_input.filename(): | |
| if current_content: | |
| print(dumps(process(current_content))) | |
| current_content = '' | |
| current_file = all_input.filename() | |
| current_content += line | |
| if current_content: | |
| print(dumps(process(current_content))) | |
| if __name__ == "__main__": | |
| main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment