import json import pathlib import sys from typing import List INDENTATION = " " * 4 TYPE_DICT = { "string": "th.StringType", "integer": "th.IntegerType", "number": "th.NumberType", "array": "th.ArrayType", "object": "th.ObjectType", "null": "th.StringType", "boolean": "th.BooleanType", } def parse_attribute(key, value, depth=0) -> str: "Each attribute should start on a newline." print(f"Parsing {key}: {value}") datatype = extract_datatype(value.get("type", "null")) print(f"..Identified as {datatype}") if datatype == "array": return print_array_attribute(key, value, depth) elif datatype == "object": return print_object_attribute(key, value, depth) else: return print_normal_attribute(key, value, depth) def print_normal_attribute(key, value, depth=0) -> str: raw_type = extract_datatype(value.get("type", "null")) datatype = TYPE_DICT[raw_type] prefix = "\n" + (depth * INDENTATION) translation = prefix + f'th.Property("{key}", {datatype}),' return translation def print_array_attribute(key, value, depth=0) -> str: datatype = TYPE_DICT["array"] item_type = extract_datatype(value.get("items", {}).get("type", "null")) prefix = "\n" + (depth * INDENTATION) translation = prefix + f'th.Property("{key}", {datatype}(' if item_type == "object": object = value.get("items", {}) if len(object.get("properties", {}).keys()) > 0: additional = print_object_attribute(None, object, depth + 1) translation = translation + additional + "\n" + (depth * INDENTATION) + ")," else: translation = translation + ")," # translation = translation + additional# + '\n' + (depth * INDENTATION) + '),' else: translation = translation + f"{TYPE_DICT[item_type]}))," return translation def print_object_attribute(key, value, depth=0) -> str: datatype = TYPE_DICT["object"] prefix = "\n" + (depth * INDENTATION) if key is not None: translation = prefix + f'th.Property("{key}", {datatype}(' else: translation = prefix + f"{datatype}(" properties = value.get("properties", {}) if len(properties.keys()) > 0: additional = parse_object_attributes(properties, depth + 1) translation = translation + additional translation = translation + prefix + "))," else: translation = translation + "))," return translation def extract_datatype(value): if type(value) == list: first_value = [x for x in value if x != "null"][0] return first_value return value def parse_object_attributes(properties, depth=0) -> str: if len(properties.keys()) == 0: return "()" else: attributes = "" for k, v in properties.items(): translation = parse_attribute(k, v, depth) attributes = attributes + translation return attributes def parse_json_schema(stream_schema: dict): template = f"""from singer_sdk import typing as th schema = th.PropertiesList(\nCONTENTS\n).to_dict() """ attributes = parse_object_attributes(stream_schema, depth=1) return template.replace("CONTENTS", attributes.strip("\n")) if __name__ == "__main__": input_fname = sys.argv[1] output_fname = sys.argv[2] txt = pathlib.Path(input_fname).read_text() catalog = json.loads(txt) entrypoint = catalog["properties"] attributes = parse_json_schema(entrypoint) output = pathlib.Path(output_fname).write_text(attributes)