- 
      
 - 
        
Save mahmoud/db02d16ac89fa401b968 to your computer and use it in GitHub Desktop.  
| """ | |
| This is an extension of the technique first detailed here: | |
| http://sedimental.org/remap.html#add_common_keys | |
| In short, it calls remap on each container, back to front, using the accumulating | |
| previous values as the default for the current iteration. | |
| """ | |
| from boltons.iterutils import remap, get_path, default_enter, default_visit | |
| defaults = {'host': '127.0.0.1', | |
| 'port': 8000, | |
| 'endpoints': {'persistence': {'host': '127.0.0.1', | |
| 'port': 8888}, | |
| 'cache': {'host': '127.0.0.1', | |
| 'port': 8889}}, | |
| 'owners': {'secondary': ['alice']}, | |
| 'zones': [{'a': 1}], | |
| 'notes': ['this is the default']} | |
| overlay = {'host': '127.0.0.1', | |
| 'port': 8080, | |
| 'endpoints': {'persistence': {'host': '10.2.2.2', | |
| 'port': 5433}}, | |
| 'overlay_version': '5.0', | |
| 'owners': {'primary': ['bob'], 'secondary': ['charles']}, | |
| 'zones': [{'a': 2}], | |
| 'notes': ['this is the overlay']} | |
| cache_host_override = {'endpoints': {'cache': {'host': '127.0.0.2'}}} | |
| def remerge(target_list, sourced=False): | |
| """Takes a list of containers (e.g., dicts) and merges them using | |
| boltons.iterutils.remap. Containers later in the list take | |
| precedence (last-wins). | |
| By default, returns a new, merged top-level container. With the | |
| *sourced* option, `remerge` expects a list of (*name*, container*) | |
| pairs, and will return a source map: a dictionary mapping between | |
| path and the name of the container it came from. | |
| """ | |
| if not sourced: | |
| target_list = [(id(t), t) for t in target_list] | |
| ret = None | |
| source_map = {} | |
| def remerge_enter(path, key, value): | |
| new_parent, new_items = default_enter(path, key, value) | |
| if ret and not path and key is None: | |
| new_parent = ret | |
| try: | |
| cur_val = get_path(ret, path + (key,)) | |
| except KeyError: | |
| pass | |
| else: | |
| # TODO: type check? | |
| new_parent = cur_val | |
| if isinstance(value, list): | |
| # lists are purely additive. See https://github.com/mahmoud/boltons/issues/81 | |
| new_parent.extend(value) | |
| new_items = [] | |
| return new_parent, new_items | |
| for t_name, target in target_list: | |
| if sourced: | |
| def remerge_visit(path, key, value): | |
| source_map[path + (key,)] = t_name | |
| return True | |
| else: | |
| remerge_visit = default_visit | |
| ret = remap(target, enter=remerge_enter, visit=remerge_visit) | |
| if not sourced: | |
| return ret | |
| return ret, source_map | |
| def main(): | |
| from pprint import pprint | |
| merged, source_map = remerge([('defaults', defaults), | |
| ('overlay', overlay), | |
| ('cache_host_override', cache_host_override)], | |
| sourced=True) | |
| assert merged['host'] == '127.0.0.1' | |
| assert merged['port'] == 8080 | |
| assert merged['endpoints']['persistence']['host'] == '10.2.2.2' | |
| assert merged['endpoints']['persistence']['port'] == 5433 | |
| assert merged['endpoints']['cache']['host'] == '127.0.0.2' | |
| assert merged['endpoints']['cache']['port'] == 8889 | |
| assert merged['overlay_version'] == '5.0' | |
| pprint(merged) | |
| pprint(source_map) | |
| print(len(source_map), 'paths') | |
| if __name__ == '__main__': | |
| main() | 
| {'endpoints': {'cache': {'host': '127.0.0.2', 'port': 8889}, | |
| 'persistence': {'host': '10.2.2.2', 'port': 5433}}, | |
| 'host': '127.0.0.1', | |
| 'notes': ['this is the default', 'this is the overlay'], | |
| 'overlay_version': '5.0', | |
| 'owners': {'primary': ['bob'], 'secondary': ['alice', 'charles']}, | |
| 'port': 8080, | |
| 'zones': [{'a': 1}, {'a': 2}]} | |
| {('endpoints',): 'cache_host_override', | |
| ('endpoints', 'cache'): 'cache_host_override', | |
| ('endpoints', 'cache', 'host'): 'cache_host_override', | |
| ('endpoints', 'cache', 'port'): 'defaults', | |
| ('endpoints', 'persistence'): 'overlay', | |
| ('endpoints', 'persistence', 'host'): 'overlay', | |
| ('endpoints', 'persistence', 'port'): 'overlay', | |
| ('host',): 'overlay', | |
| ('notes',): 'overlay', | |
| ('overlay_version',): 'overlay', | |
| ('owners',): 'overlay', | |
| ('owners', 'primary'): 'overlay', | |
| ('owners', 'secondary'): 'overlay', | |
| ('port',): 'overlay', | |
| ('zones',): 'overlay'} | |
| (15, 'paths') | 
@pleasantone, that is very strange. I updated the gist with the case I tried and the behavior I see. Really not sure how this could have happened.
Oh wait, I see one difference. On Twitter, we'd said a dict with a list in it, but here we have a list with a dict in it. OK, let me try that.
(I got the reproduction, will debug this evening.)
Aaaand fixed. The description, process, etc. is all here in Boltons issue #81.
This is great, thank you!
Hi. This gist seems to work fine. It is possible to get in in boltons.dictutils?
Here my humble contribution (merge list flag + unit tests):
# Third Party Libraries
from boltons.iterutils import default_enter
from boltons.iterutils import default_visit
from boltons.iterutils import get_path
from boltons.iterutils import remap
from structlog import get_logger
log = get_logger()
__all__ = ["remerge"]
def remerge(target_list, sourced=False, replace_lists=False):  # noqa: C901
    """Merge a list of dicts.
    Takes a list of containers (e.g., dicts) and merges them using
    boltons.iterutils.remap. Containers later in the list take
    precedence (last-wins).
    By default (``replace_lists=False``), items with the "list" type are not
    replaced but items are appended. Setting ``replace_lists==True`` means
    lists content are replaced when overriden.
    By default, returns a new, merged top-level container.
    With the *sourced* option, `remerge` expects a list of (*name*, container*)
    pairs, and will return a source map: a dictionary mapping between
    path and the name of the container it came from.
    Example:
    .. code-block:: python
        merged, source_map = remerge([('defaults', defaults),
                                      ('overlay', overlay),
                                      ('cache_host_override', cache_host_override),
                                     ],
                                     sourced=True)
    """
    # Discusson in :
    # https://gist.github.com/pleasantone/c99671172d95c3c18ed90dc5435ddd57
    # Final gist in:
    # https://gist.github.com/mahmoud/db02d16ac89fa401b968
    if not sourced:
        target_list = [(id(t), t) for t in target_list]
    ret = None
    source_map = {}
    def remerge_enter(path, key, value):
        new_parent, new_items = default_enter(path, key, value)
        if ret and not path and key is None:
            new_parent = ret
        try:
            cur_val = get_path(ret, path + (key, ))
        except KeyError:
            pass
        else:
            # TODO: type check?
            new_parent = cur_val
        if isinstance(value, list):
            if replace_lists:
                new_parent = value
            else:
                # lists are purely additive. See https://github.com/mahmoud/boltons/issues/81
                new_parent.extend(value)
            new_items = []
        return new_parent, new_items
    for t_name, target in target_list:
        if sourced:
            def remerge_visit(path, key, _value):
                source_map[path + (key, )] = t_name  # pylint: disable=cell-var-from-loop
                return True
        else:
            remerge_visit = default_visit
        ret = remap(target, enter=remerge_enter, visit=remerge_visit)
    if not sourced:
        return ret
    return ret, source_mapUnit test:
# coding: utf-8
# Standard Library
from pprint import pprint
# Gitlab Project Configurator Modules
from gpc.helpers.remerge import remerge
def test_override_string():
    defaults = {'key_to_override': 'value_from_defaults'}
    first_override = {'key_to_override': 'value_from_first_override'}
    merged, source_map = remerge([('defaults', defaults),
                                  ('first_override', first_override),
                                  ],
                                 sourced=True)
    expected_merged = {'key_to_override': 'value_from_first_override'}
    assert merged == expected_merged
    assert source_map == {('key_to_override', ): 'first_override'}
    merged = remerge([defaults, first_override], sourced=False)
    assert merged == expected_merged
def test_override_subdict():
    defaults = {
        'subdict': {
            'other_subdict': {
                'key_to_override': 'value_from_defaults',
                'integer_to_override': 2222
            }
        }
    }
    first_override = {
        'subdict': {
            'other_subdict': {
                'key_to_override': 'value_from_first_override',
                'integer_to_override': 5555
            }
        }
    }
    expected_merge = {
        'subdict': {
            'other_subdict': {
                'integer_to_override': 5555,
                'key_to_override': 'value_from_first_override'
            }
        }
    }
    merged, source_map = remerge([('defaults', defaults),
                                  ('first_override', first_override),
                                  ],
                                 sourced=True)
    assert merged == expected_merge
    assert source_map == {
        ('subdict',
         ): 'first_override',
        ('subdict',
         'other_subdict'): 'first_override',
        ('subdict',
         'other_subdict',
         'integer_to_override'): 'first_override',
        ('subdict',
         'other_subdict',
         'key_to_override'): 'first_override'
    }
    merged = remerge([defaults, first_override], sourced=False)
    assert merged == expected_merge
def test_override_list_append():
    defaults = {'list_to_append': [{'a': 1}]}
    first_override = {'list_to_append': [{'b': 1}]}
    merged, source_map = remerge([('defaults', defaults),
                                  ('first_override', first_override),
                                  ],
                                 sourced=True)
    expected_merged = {'list_to_append': [{'a': 1}, {'b': 1}]}
    assert merged == expected_merged
    assert source_map == {('list_to_append', ): 'first_override'}
    merged = remerge([defaults, first_override], sourced=False)
    assert merged == expected_merged
def test_override_list_replace():
    defaults = {'list_to_replace': [{'a': 1}]}
    first_override = {'list_to_replace': [{'b': 1}]}
    merged, source_map = remerge([('defaults', defaults),
                                  ('first_override', first_override),
                                  ],
                                 sourced=True, replace_lists=True)
    expected_merged = {'list_to_replace': [{'b': 1}]}
    assert merged == expected_merged
    assert source_map == {('list_to_replace', ): 'first_override'}
    merged = remerge([defaults, first_override], sourced=False, replace_lists=True)
    assert merged == expected_merged
def test_complex_dict():
    defaults = {
        'key_to_override': 'value_from_defaults',
        'integer_to_override': 1111,
        'list_to_append': [{
            'a': 1
        }],
        'subdict': {
            'other_subdict': {
                'key_to_override': 'value_from_defaults',
                'integer_to_override': 2222
            },
            'second_subdict': {
                'key_to_override': 'value_from_defaults',
                'integer_to_override': 3333
            }
        }
    }
    first_override = {
        'key_to_override': 'value_from_first_override',
        'integer_to_override': 4444,
        'list_to_append': [{
            'b': 2
        }],
        'subdict': {
            'other_subdict': {
                'key_to_override': 'value_from_first_override',
                'integer_to_override': 5555
            }
        },
        'added_in_first_override': 'some_string'
    }
    second_override = {
        'subdict': {
            'second_subdict': {
                'key_to_override': 'value_from_second_override'
            }
        }
    }
    merged, source_map = remerge([('defaults', defaults),
                                  ('first_override', first_override),
                                  ('second_override', second_override),
                                  ],
                                 sourced=True)
    print("")
    print("'merged' dictionary:")
    pprint(merged)
    print("")
    pprint(source_map)
    print(len(source_map), 'paths')
    assert merged['key_to_override'] == 'value_from_first_override'
    assert merged['integer_to_override'] == 4444
    assert merged['subdict']['other_subdict']['key_to_override'] == 'value_from_first_override'
    assert merged['subdict']['other_subdict']['integer_to_override'] == 5555
    assert merged['subdict']['second_subdict']['key_to_override'] == 'value_from_second_override'
    assert merged['subdict']['second_subdict']['integer_to_override'] == 3333
    assert merged['added_in_first_override'] == 'some_string'
    assert merged["list_to_append"] == [{'a': 1}, {'b': 2}]Thank you very much for this very helpful gift. I'm out of my depth with respect to your code above but I did bolt on remerge to the cranky machine I'm making with python. My results were great until I passed it, as the first argument, a dictionary with keys of None type. That caused a breakdown. To get the thing running again I only had to create empty dictionaries for those keys first, but I thought you'd like to know that keys having value of None may need attention.
My problem dictionaries looked something like this:
- {'info': None, 'settings': None}
 - {'info': {'measures': 3, 'clef': 'Treble'}, 'settings': {'format':{....}, 'processing': {....}}}
.
..but I'm not positive I had more than one level of nesting in the second dictionary, short on sleep. 
@gsemet, I wouldn't mind having it in boltons (though probably in iterutils, just for ease of dependence), we could continue the review process there if you want to prepare a PR.
@JoanEliot, that's true, you need the structures to roughly match. Maybe it makes sense for Nones to be overridden, but by that same token, it might make sense to preprocess one side to remove Nones? I could go either way. I'm glad you got it to work!
Thank you for this utility.
I believe that functions that change their return-type based on their inputs,
are hard to work with, even if it's only for debugging aid.
Also needed provenance information for merged lists,
so i did the following changes:
- refact: feed the 
source_mapwhen calling the function to be populated only if notNone;
now the return type is always the same. - enh: when merging lists, 
source_maplists all mergers (important when debugging). - optimize: do not to create a new 
remerge_visit()closure on each(!) container to merge, but decide it up-front. - optimize: don't recreate input container with dummy 
id()just to fit theremerge_visit()for whensource_mapis asked;
actually don't even override default enter-function when nosource_mapis asked, simply run a simpler loop. - refact: renamed 
target_list-->*containers, so as not to have to create the list-of-containers when calling it. - refact: combine trivial 
elsecode withtry-except. - Doc: terse sphinx docstring with a doctested example.
 - Doc: explain that the input-dicts order is NOT preserved in the results when 
source_mapis asked. - [edit:] dropped 
replace_listsargument (my apologies, just wasn't necessary to me, and copy pasted from my project), 
def remerge(*containers, source_map: list = None):
    """
    Merge recursively dicts or lists with :func:`boltons.iterutils.remap()`.
    :param containers:
        a list of dicts or lists to merge; later ones take precedence
        (last-wins).
        If `source_map` is given, these must be 2-tuples of ``(name: container)``.
    :param source_map:
        If given, it must be a dictionary, and `containers` arg must be 2-tuples
        like ``(name: container)``.
        The `source_map` will be populated with mappings between path and the name
        of the container it came from.
        .. Warning::
            if source_map given, the order of input dictionaries is NOT preserved
            is the results  (important if your code rely on PY3.7 stable dictionaries).
    :return:
        returns a new, merged top-level container.
    - Adapted from https://gist.github.com/mahmoud/db02d16ac89fa401b968
    - Discusson in: https://gist.github.com/pleasantone/c99671172d95c3c18ed90dc5435ddd57
    **Example**
    >>> defaults = {
    ...     'subdict': {
    ...         'as_is': 'hi',
    ...         'overridden_key1': 'value_from_defaults',
    ...         'overridden_key1': 2222,
    ...         'merged_list': ['hi', {'untouched_subdict': 'v1'}],
    ...     }
    ... }
    >>> overrides = {
    ...     'subdict': {
    ...         'overridden_key1': 'overridden value',
    ...         'overridden_key2': 5555,
    ...         'merged_list': ['there'],
    ...     }
    ... }
    >>> source_map = {}
    >>> remerge(
    ...     ("defaults", defaults),
    ...     ("overrides", overrides),
    ...     source_map=source_map)
     {'subdict': {'as_is': 'hi',
                  'overridden_key1': 'overridden value',
                  'merged_list': ['hi', {'untouched_subdict': 'v1'}, 'there'],
                  'overridden_key2': 5555}}
    >>> source_map
    {('subdict', 'as_is'): 'defaults',
     ('subdict', 'overridden_key1'): 'overrides',
     ('subdict', 'merged_list'):  ['defaults', 'overrides'],
     ('subdict',): 'overrides',
     ('subdict', 'overridden_key2'): 'overrides'}
    """
    ret = None
    def remerge_enter(path, key, value):
        new_parent, new_items = default_enter(path, key, value)
        if ret and not path and key is None:
            new_parent = ret
        try:
            # TODO: type check?
            new_parent = get_path(ret, path + (key,))
        except KeyError:
            pass
        if isinstance(value, list):
            # lists are purely additive. See https://github.com/mahmoud/boltons/issues/81
            new_parent.extend(value)
            new_items = []
        return new_parent, new_items
    if source_map is not None:
        def remerge_visit(path, key, value):
            full_path = path + (key,)
            if isinstance(value, list):
                old = source_map.get(full_path)
                if old:
                    old.append(t_name)
                else:
                    source_map[full_path] = [t_name]
            else:
                source_map[full_path] = t_name
            return True
        for t_name, cont in containers:
            ret = remap(cont, enter=remerge_enter, visit=remerge_visit)
    else:
        for cont in containers:
            ret = remap(cont, enter=remerge_enter)
        ret = remap(cont, enter=remerge_enter, visit=remerge_visit)
    return retUnit-tests (without replace-list :-()
# coding: utf-8
# Standard Library
from pprint import pprint
# Gitlab Project Configurator Modules
# from gpc.helpers.remerge import remerge
def test_override_string():
    defaults = {"key_to_override": "value_from_defaults"}
    first_override = {"key_to_override": "value_from_first_override"}
    source_map = {}
    merged = remerge(
        ("defaults", defaults),
        ("first_override", first_override),
        source_map=source_map,
    )
    expected_merged = {"key_to_override": "value_from_first_override"}
    assert merged == expected_merged
    assert source_map == {("key_to_override",): "first_override"}
    merged = remerge(defaults, first_override, source_map=None)
    assert merged == expected_merged
def test_override_subdict():
    defaults = {
        "subdict": {
            "other_subdict": {
                "key_to_override": "value_from_defaults",
                "integer_to_override": 2222,
            }
        }
    }
    first_override = {
        "subdict": {
            "other_subdict": {
                "key_to_override": "value_from_first_override",
                "integer_to_override": 5555,
            }
        }
    }
    expected_merge = {
        "subdict": {
            "other_subdict": {
                "integer_to_override": 5555,
                "key_to_override": "value_from_first_override",
            }
        }
    }
    source_map = {}
    merged = remerge(
        ("defaults", defaults),
        ("first_override", first_override),
        source_map=source_map,
    )
    assert merged == expected_merge
    assert source_map == {
        ("subdict",): "first_override",
        ("subdict", "other_subdict"): "first_override",
        ("subdict", "other_subdict", "integer_to_override"): "first_override",
        ("subdict", "other_subdict", "key_to_override"): "first_override",
    }
    merged = remerge(defaults, first_override, source_map=None)
    assert merged == expected_merge
def test_override_list_append():
    defaults = {"list_to_append": [{"a": 1}]}
    first_override = {"list_to_append": [{"b": 1}]}
    source_map = {}
    merged = remerge(
        ("defaults", defaults),
        ("first_override", first_override),
        source_map=source_map,
    )
    expected_merged = {"list_to_append": [{"a": 1}, {"b": 1}]}
    assert merged == expected_merged
    assert source_map == {("list_to_append",): "first_override"}
    merged = remerge(defaults, first_override, source_map=None)
    assert merged == expected_merged
def test_complex_dict():
    defaults = {
        "key_to_override": "value_from_defaults",
        "integer_to_override": 1111,
        "list_to_append": [{"a": 1}],
        "subdict": {
            "other_subdict": {
                "key_to_override": "value_from_defaults",
                "integer_to_override": 2222,
            },
            "second_subdict": {
                "key_to_override": "value_from_defaults",
                "integer_to_override": 3333,
            },
        },
    }
    first_override = {
        "key_to_override": "value_from_first_override",
        "integer_to_override": 4444,
        "list_to_append": [{"b": 2}],
        "subdict": {
            "other_subdict": {
                "key_to_override": "value_from_first_override",
                "integer_to_override": 5555,
            }
        },
        "added_in_first_override": "some_string",
    }
    second_override = {
        "subdict": {"second_subdict": {"key_to_override": "value_from_second_override"}}
    }
    source_map = {}
    merged = remerge(
        ("defaults", defaults),
        ("first_override", first_override),
        ("second_override", second_override),
        source_map=source_map,
    )
    print("")
    print("'merged' dictionary:")
    pprint(merged)
    print("")
    pprint(source_map)
    print(len(source_map), "paths")
    assert merged["key_to_override"] == "value_from_first_override"
    assert merged["integer_to_override"] == 4444
    assert (
        merged["subdict"]["other_subdict"]["key_to_override"]
        == "value_from_first_override"
    )
    assert merged["subdict"]["other_subdict"]["integer_to_override"] == 5555
    assert (
        merged["subdict"]["second_subdict"]["key_to_override"]
        == "value_from_second_override"
    )
    assert merged["subdict"]["second_subdict"]["integer_to_override"] == 3333
    assert merged["added_in_first_override"] == "some_string"
    assert merged["list_to_append"] == [{"a": 1}, {"b": 2}]@ankostis Cool! Glad this old gem continues to provide utility. Thanks for sharing :)
Realized the code was a bit inefficient, so i did these two changes on the code above:
- optimize: do not to create a new 
remerge_visit()closure on each(!) container to merge, but decide it up-front. - optimize: don't recreate input container with dummy 
id()just to fit theremerge_visit()for whensource_mapis asked; 
I kept the edited code it in the same comment, above, for future reference.
[edit:] bu i'm still bugged by the handling of  None when extending lists :-(
There is a bug (limitation) in this implementation, it will not work for lists inside your configuration (python 3.5, boltons 16.4.1)
If you have a list of values, remap will create a circular reference:
for example, add
to
overlayand you will get the following output: