Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save sgouda0412/ca4380f12f7f3065483f7ce0fc4f5d17 to your computer and use it in GitHub Desktop.
Save sgouda0412/ca4380f12f7f3065483f7ce0fc4f5d17 to your computer and use it in GitHub Desktop.

Revisions

  1. @ducchetrongminh ducchetrongminh revised this gist Jul 8, 2022. 1 changed file with 1 addition and 0 deletions.
    1 change: 1 addition & 0 deletions dbt_docsblock_autogenerator.py
    Original file line number Diff line number Diff line change
    @@ -12,6 +12,7 @@

    """
    This script will generate dbt docs block from documentation inside yml files. This allows us to reuse existing dbt documentation.
    Use official solution when this issue is fixed: https://github.com/dbt-labs/dbt-core/issues/2995.
    To generate docs block files:
    python -m path.to.dbt_docsblock_autogenerator
  2. @ducchetrongminh ducchetrongminh revised this gist Jul 8, 2022. 1 changed file with 20 additions and 11 deletions.
    31 changes: 20 additions & 11 deletions dbt_docsblock_autogenerator.py
    Original file line number Diff line number Diff line change
    @@ -13,23 +13,24 @@
    """
    This script will generate dbt docs block from documentation inside yml files. This allows us to reuse existing dbt documentation.
    To reuse the docs block:
    To generate docs block files:
    python -m path.to.dbt_docsblock_autogenerator
    It will generate these files:
    - 1st-docs-path/autogen_models_docsblock.md
    - 1st-docs-path/autogen_sources_docsblock.md
    To reuse documentation, use docs block with the following format:
    - For model:
    + autogen__model_name
    + autogen__model_name__column_name
    - For source:
    + autogen__source_name__source_table_name
    + autogen__source_name__source_table_name__column_name
    This script will generate these files:
    - 1st-docs-path/autogen_models_docsblock.md
    - 1st-docs-path/autogen_sources_docsblock.md
    Note:
    - You must set the config docs-paths.
    - It will use the first path in your config docs-paths.
    - It will use the first path in your config docs-paths or model-paths.
    - You can change the docsblock ID prefix (in this case 'autogen'). You can set to None to exclude prefix.
    - You can change the docsblock template.
    """


    @@ -52,10 +53,18 @@ def get_dbt_config() -> dict:
    return dbt_config


    def get_all_docs_paths(dbt_config: dict) -> list:
    all_docs_paths = list(set(
    dbt_config.get('docs-paths', [])
    + dbt_config.get('model-paths', [])
    )) # use set to deduplicate
    return all_docs_paths


    def yield_all_yml_files(dbt_config: dict):
    dbt_all_docs_paths = set(dbt_config.get('model-paths') + dbt_config.get('docs-paths')) # use set to deduplicate
    all_docs_paths = get_all_docs_paths(dbt_config)

    for docs_path in dbt_all_docs_paths:
    for docs_path in all_docs_paths:
    for root, dirs, files in os.walk(docs_path):
    for file in files:
    if file.endswith(".yml"):
    @@ -144,7 +153,7 @@ def render_source_column_docsblock(column: dict, source_name: str, source_table_

    def generate_docsblock():
    dbt_config = get_dbt_config()
    dbt_docs_path = dbt_config['docs-paths'][0]
    dbt_docs_path = get_all_docs_paths(dbt_config)[0]

    models_docsblock_path = os.path.join(dbt_docs_path, 'autogen_models_docsblock.md')
    models_docsblock_file = open(models_docsblock_path, 'w')
  3. @ducchetrongminh ducchetrongminh created this gist Jul 8, 2022.
    185 changes: 185 additions & 0 deletions dbt_docsblock_autogenerator.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,185 @@
    # Standard imports
    import os


    # Library imports
    import yaml



    # Local imports


    """
    This script will generate dbt docs block from documentation inside yml files. This allows us to reuse existing dbt documentation.
    To reuse the docs block:
    - For model:
    + autogen__model_name
    + autogen__model_name__column_name
    - For source:
    + autogen__source_name__source_table_name
    + autogen__source_name__source_table_name__column_name
    This script will generate these files:
    - 1st-docs-path/autogen_models_docsblock.md
    - 1st-docs-path/autogen_sources_docsblock.md
    Note:
    - You must set the config docs-paths.
    - It will use the first path in your config docs-paths.
    - You can change the docsblock ID prefix (in this case 'autogen'). You can set to None to exclude prefix.
    - You can change the docsblock template.
    """



    DOCSBLOCK_ID_PREFIX = 'autogen'
    DOCSBLOCK_TEMPLATE = '''
    {{% docs {docsblock_id} %}}
    **\>\>\> INHERITED FROM {source} <<<**
    {docsblock_content}
    {{% enddocs %}}
    '''



    def get_dbt_config() -> dict:
    with open('dbt_project.yml') as f:
    dbt_config = yaml.safe_load(f)
    return dbt_config


    def yield_all_yml_files(dbt_config: dict):
    dbt_all_docs_paths = set(dbt_config.get('model-paths') + dbt_config.get('docs-paths')) # use set to deduplicate

    for docs_path in dbt_all_docs_paths:
    for root, dirs, files in os.walk(docs_path):
    for file in files:
    if file.endswith(".yml"):
    with open(os.path.join(root, file)) as f:
    file_data = yaml.safe_load(f)
    yield file_data


    def render_model_docsblock(model: dict) -> str:
    docsblock_id = '__'.join([
    DOCSBLOCK_ID_PREFIX,
    model.get('name'),
    ])
    source = model.get('name')
    docsblock_content = model.get('description', '-') \
    .replace('{{', '').replace('}}', '') # avoid jinja inside docs block

    return DOCSBLOCK_TEMPLATE.format(
    docsblock_id = docsblock_id,
    source = source,
    docsblock_content = docsblock_content
    )


    def render_model_column_docsblock(column: dict, model_name: str) -> str:
    docsblock_id = '__'.join([
    DOCSBLOCK_ID_PREFIX,
    model_name,
    column.get('name'),
    ])
    source = '.'.join([
    model_name,
    column.get('name'),
    ])
    docsblock_content = column.get('description', '-') \
    .replace('{{', '').replace('}}', '')

    return DOCSBLOCK_TEMPLATE.format(
    docsblock_id = docsblock_id,
    source = source,
    docsblock_content = docsblock_content
    )


    def render_source_table_docsblock(source_table: dict, source_name: str) -> str:
    docsblock_id = '__'.join([
    DOCSBLOCK_ID_PREFIX,
    source_name,
    source_table.get('name'),
    ])
    source = '.'.join([
    source_name,
    source_table.get('name'),
    ])
    docsblock_content = source_table.get('description', '-') \
    .replace('{{', '').replace('}}', '')

    return DOCSBLOCK_TEMPLATE.format(
    docsblock_id = docsblock_id,
    source = source,
    docsblock_content = docsblock_content
    )


    def render_source_column_docsblock(column: dict, source_name: str, source_table_name: str) -> str:
    docsblock_id = '__'.join([
    DOCSBLOCK_ID_PREFIX,
    source_name,
    source_table_name,
    column.get('name'),
    ])
    source = '.'.join([
    source_name,
    source_table_name,
    column.get('name'),
    ])
    docsblock_content = column.get('description', '-') \
    .replace('{{', '').replace('}}', '')

    return DOCSBLOCK_TEMPLATE.format(
    docsblock_id = docsblock_id,
    source = source,
    docsblock_content = docsblock_content
    )


    def generate_docsblock():
    dbt_config = get_dbt_config()
    dbt_docs_path = dbt_config['docs-paths'][0]

    models_docsblock_path = os.path.join(dbt_docs_path, 'autogen_models_docsblock.md')
    models_docsblock_file = open(models_docsblock_path, 'w')
    sources_docsblock_path = os.path.join(dbt_docs_path, 'autogen_sources_docsblock.md')
    sources_docsblock_file = open(sources_docsblock_path, 'w')

    for data in yield_all_yml_files(dbt_config):
    # render for models
    for model in data.get('models', []):
    models_docsblock_file.write(render_model_docsblock(model))

    for column in model.get('columns', []):
    models_docsblock_file.write(render_model_column_docsblock(
    column = column,
    model_name = model.get('name')
    ))

    # render for sources
    for source in data.get('sources', []):
    for source_table in source.get('tables', []):
    sources_docsblock_file.write(render_source_table_docsblock(
    source_table = source_table,
    source_name = source.get('name'),
    ))

    for column in source_table.get('columns', []):
    sources_docsblock_file.write(render_source_column_docsblock(
    column = column,
    source_name = source.get('name'),
    source_table_name = source_table.get('name')
    ))

    models_docsblock_file.close()
    sources_docsblock_file.close()


    if __name__ == '__main__':
    generate_docsblock()