Skip to content

Instantly share code, notes, and snippets.

@dgobbi
Last active August 7, 2020 17:29
Show Gist options
  • Select an option

  • Save dgobbi/c5e9bfbe0459e3ca0471bbaaec35d4c5 to your computer and use it in GitHub Desktop.

Select an option

Save dgobbi/c5e9bfbe0459e3ca0471bbaaec35d4c5 to your computer and use it in GitHub Desktop.

Revisions

  1. dgobbi revised this gist Aug 7, 2020. 1 changed file with 3 additions and 0 deletions.
    3 changes: 3 additions & 0 deletions readdump.py
    Original file line number Diff line number Diff line change
    @@ -257,6 +257,9 @@ def read_dicomdump(lines):
    instances[instance_tag].append(dicttype([("vr", instance_vr), ("Value", sequence)]))
    continue

    # none of the regular expressions matched!
    sys.stderr.write("Unrecognized syntax:\n" + line + "\n")

    # at end of dump, pop back to root
    while len(stack) > 0:
    sequence, dataset = stack[-1]
  2. dgobbi revised this gist Aug 7, 2020. 1 changed file with 18 additions and 0 deletions.
    18 changes: 18 additions & 0 deletions readdump.py
    Original file line number Diff line number Diff line change
    @@ -49,6 +49,24 @@ def build_value(vr, vl, value):
    value_list = []
    for ptr in value.split('\\'):
    value_list.append(ptr[1:5]+ptr[6:10])
    # for integers, convert to int
    elif vr in ['IS', 'SS', 'US', 'SL', 'UL', 'SV', 'UV']:
    value_list = []
    for v in value.split('\\'):
    try:
    value_list.append(int(v))
    except ValueError:
    # TODO: warn
    pass
    # for decimal, convert to float (inexact)
    elif vr in ['DS', 'FL', 'FD']:
    value_list = []
    for v in value.split('\\'):
    try:
    value_list.append(float(v))
    except ValueError:
    # TODO: warn
    pass
    # for PN, handle "Alphabetic", "Ideographic", "Phonetic" groups
    elif vr == 'PN':
    value_list = []
  3. dgobbi revised this gist Aug 7, 2020. 1 changed file with 4 additions and 4 deletions.
    8 changes: 4 additions & 4 deletions readdump.py
    Original file line number Diff line number Diff line change
    @@ -22,9 +22,9 @@

    # regular expressions for parsing dicomdump output
    re_dataset = re.compile("==== (.*) ====")
    re_data = re.compile("( *)\\((....),(....)\\) (..|) \"([^\"]*)\" : \\[(.*)\\] [^(]*\\(([^)]*)\\)")
    re_data = re.compile("( *)\\((....),(....)\\) (..|) \"([^\"]*)\" : \\[(.*)\\] (\\{[^}]*\\}|) *\\(([^)]*)\\)")
    re_multi = re.compile("( *)\\((....),(....)\\) (..|) \"([^\"]*)\" : \\(multiple values\\)")
    re_instance = re.compile("( *) 0*([0-9]*) \\[(.*)\\] \\(([^)]*)\\)")
    re_instance = re.compile("( *) 0*([0-9]*) \\[(.*)\\] (\\{[^}]*\\}|) *\\(([^)]*)\\)")
    re_instance_sq = re.compile("( *) 0*([0-9]*) \\(([0-9]*) item[s]?([^)]*)\\)")
    re_sequence = re.compile("( *)\\((....),(....)\\) (..|) \"([^\"]*)\" : \\(([0-9]*) item[s]?([^)]*)\\)")
    re_item = re.compile("( *)---- SQ Item 0*([0-9]*) at offset ([0-9]*) ----")
    @@ -196,7 +196,7 @@ def read_dicomdump(lines):
    continue
    keyword = groups[4]
    vr = groups[3]
    vl = groups[6]
    vl = groups[7]
    value = groups[5]
    dataset[tag] = build_value(vr, vl, value)
    continue
    @@ -227,7 +227,7 @@ def read_dicomdump(lines):
    if skip_tag(instance_tag):
    continue
    value = groups[2]
    vl = groups[3]
    vl = groups[4]
    instances[instance_tag].append(build_value(instance_vr, vl, value))
    continue

  4. dgobbi revised this gist Aug 7, 2020. 1 changed file with 5 additions and 5 deletions.
    10 changes: 5 additions & 5 deletions readdump.py
    Original file line number Diff line number Diff line change
    @@ -22,11 +22,11 @@

    # regular expressions for parsing dicomdump output
    re_dataset = re.compile("==== (.*) ====")
    re_data = re.compile("( *)\\((....),(....)\\) (..|) \"([^\"]*)\" : \\[(.*)\\] \\(([0-9]*) bytes\\)")
    re_data = re.compile("( *)\\((....),(....)\\) (..|) \"([^\"]*)\" : \\[(.*)\\] [^(]*\\(([^)]*)\\)")
    re_multi = re.compile("( *)\\((....),(....)\\) (..|) \"([^\"]*)\" : \\(multiple values\\)")
    re_instance = re.compile("( *) 0*([0-9]*) \\[(.*)\\] \\(([0-9]*) bytes\\)")
    re_instance_sq = re.compile("( *) 0*([0-9]*) \\(([0-9]*) item[s]?(, delimited|)\\)")
    re_sequence = re.compile("( *)\\((....),(....)\\) (..|) \"([^\"]*)\" : \\(([0-9]*) item[s]?(, delimited|)\\)")
    re_instance = re.compile("( *) 0*([0-9]*) \\[(.*)\\] \\(([^)]*)\\)")
    re_instance_sq = re.compile("( *) 0*([0-9]*) \\(([0-9]*) item[s]?([^)]*)\\)")
    re_sequence = re.compile("( *)\\((....),(....)\\) (..|) \"([^\"]*)\" : \\(([0-9]*) item[s]?([^)]*)\\)")
    re_item = re.compile("( *)---- SQ Item 0*([0-9]*) at offset ([0-9]*) ----")
    re_mismatch = re.compile("( *)VR mismatch! (..|) != (..) (.*)")
    re_indent = re.compile("( *).*")
    @@ -36,7 +36,7 @@ def build_value(vr, vl, value):
    """Convert an attribute value from a dicomdump file to DICOM's json format.
    """
    # if VL is zero, then no value is given
    if vl == '0':
    if vl == '0 bytes':
    return dicttype([("vr", vr)])
    # if bulk data, use empty BulkDataURI (TODO: InlineBinary)
    if vr in ['OB', 'OD', 'OF', 'OL', 'OV', 'OW']:
  5. dgobbi revised this gist Aug 7, 2020. 1 changed file with 23 additions and 11 deletions.
    34 changes: 23 additions & 11 deletions readdump.py
    Original file line number Diff line number Diff line change
    @@ -140,17 +140,23 @@ def read_dicomdump(lines):
    continue

    # check the indentation, which indicates depth
    if not (re_instance.match(line) or re_instance_sq.match(line)):
    m = re_indent.match(line)
    indent = len(m.group(1))/2
    # check for extra indentation that isn't in a sequence
    if indent > len(stack) and not re_instance.match(line):
    sys.stderr.write("Improper indentation:\n" + line + "\n")
    continue
    # check for decreasing indentation (marks end of a block)
    while len(stack) > indent:
    sequence, dataset = stack[-1]
    stack.pop()
    if re_instance.match(line):
    # always at the root, depth of zero
    depth = 0
    elif re_instance_sq.match(line):
    # always within a sequence at the root, hence depth is 1
    depth = 1
    else:
    # the depth is given by the indentation
    depth = len(re_indent.match(line).group(1))/2
    # check for extra indentation that isn't in a sequence
    if depth > len(stack):
    sys.stderr.write("Improper indentation:\n" + line + "\n")
    continue
    # check for decreasing indentation (marks end of a block)
    while len(stack) > depth:
    sequence, dataset = stack[-1]
    stack.pop()

    # new dataset (indicated by "====" in the file)
    m = re_dataset.match(line)
    @@ -233,8 +239,14 @@ def read_dicomdump(lines):
    instances[instance_tag].append(dicttype([("vr", instance_vr), ("Value", sequence)]))
    continue

    # at end of dump, pop back to root
    while len(stack) > 0:
    sequence, dataset = stack[-1]
    stack.pop()

    # change out "multiple value" data elements into a series of datasets
    handle_instances(sequence, instances)

    return sequence


  6. dgobbi revised this gist Aug 7, 2020. 1 changed file with 0 additions and 2 deletions.
    2 changes: 0 additions & 2 deletions readdump.py
    Original file line number Diff line number Diff line change
    @@ -223,8 +223,6 @@ def read_dicomdump(lines):
    value = groups[2]
    vl = groups[3]
    instances[instance_tag].append(build_value(instance_vr, vl, value))
    if len(instances[instance_tag]) == 16:
    print("XXX", instance_tag, line)
    continue

    # one instance of a "multiple value" attribute that is SQ
  7. dgobbi revised this gist Aug 7, 2020. 1 changed file with 14 additions and 14 deletions.
    28 changes: 14 additions & 14 deletions readdump.py
    Original file line number Diff line number Diff line change
    @@ -40,27 +40,27 @@ def build_value(vr, vl, value):
    return dicttype([("vr", vr)])
    # if bulk data, use empty BulkDataURI (TODO: InlineBinary)
    if vr in ['OB', 'OD', 'OF', 'OL', 'OV', 'OW']:
    return dicttype([("vr", vr), ("BulkDataURI", "")])
    return dicttype([("vr", vr), ("BulkDataURI", "")])
    # these text VRs are always single-valued
    if vr in ['LT', 'ST', 'UT']:
    value_list = [ value ]
    # for AT, convert dicomdump sytax to DICOM json syntax
    elif vr == 'AT':
    value_list = []
    for ptr in value.split('\\'):
    value_list.append(ptr[1:5]+ptr[6:10])
    value_list = []
    for ptr in value.split('\\'):
    value_list.append(ptr[1:5]+ptr[6:10])
    # for PN, handle "Alphabetic", "Ideographic", "Phonetic" groups
    elif vr == 'PN':
    value_list = []
    for name in value.split('\\'):
    name_attrs = {}
    parts = name.split('=')
    name_attrs['Alphabetic'] = parts[0]
    if len(parts) > 1:
    name_attrs['Ideographic'] = parts[1]
    if len(parts) > 2:
    name_attrs['Phonetic'] = parts[2]
    value_list.append(name_attrs)
    value_list = []
    for name in value.split('\\'):
    name_attrs = {}
    parts = name.split('=')
    name_attrs['Alphabetic'] = parts[0]
    if len(parts) > 1:
    name_attrs['Ideographic'] = parts[1]
    if len(parts) > 2:
    name_attrs['Phonetic'] = parts[2]
    value_list.append(name_attrs)
    # for all other VRs
    else:
    value_list = value.split('\\')
  8. dgobbi created this gist Aug 7, 2020.
    266 changes: 266 additions & 0 deletions readdump.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,266 @@
    #! /usr/bin/env python

    """
    This program reads the output from "dicomdump" and converts it to json.
    The latest version of this code can be found at gist.github.com/dgobbi
    Note that this code is incomplete, incorrect, and may destroy your data.
    It comes with absolutely no warranties. Use at your own risk.
    """

    import argparse
    import sys
    import re
    import json
    import collections


    # the python dict type we want to use is "OrderedDict"
    dicttype = collections.OrderedDict


    # regular expressions for parsing dicomdump output
    re_dataset = re.compile("==== (.*) ====")
    re_data = re.compile("( *)\\((....),(....)\\) (..|) \"([^\"]*)\" : \\[(.*)\\] \\(([0-9]*) bytes\\)")
    re_multi = re.compile("( *)\\((....),(....)\\) (..|) \"([^\"]*)\" : \\(multiple values\\)")
    re_instance = re.compile("( *) 0*([0-9]*) \\[(.*)\\] \\(([0-9]*) bytes\\)")
    re_instance_sq = re.compile("( *) 0*([0-9]*) \\(([0-9]*) item[s]?(, delimited|)\\)")
    re_sequence = re.compile("( *)\\((....),(....)\\) (..|) \"([^\"]*)\" : \\(([0-9]*) item[s]?(, delimited|)\\)")
    re_item = re.compile("( *)---- SQ Item 0*([0-9]*) at offset ([0-9]*) ----")
    re_mismatch = re.compile("( *)VR mismatch! (..|) != (..) (.*)")
    re_indent = re.compile("( *).*")


    def build_value(vr, vl, value):
    """Convert an attribute value from a dicomdump file to DICOM's json format.
    """
    # if VL is zero, then no value is given
    if vl == '0':
    return dicttype([("vr", vr)])
    # if bulk data, use empty BulkDataURI (TODO: InlineBinary)
    if vr in ['OB', 'OD', 'OF', 'OL', 'OV', 'OW']:
    return dicttype([("vr", vr), ("BulkDataURI", "")])
    # these text VRs are always single-valued
    if vr in ['LT', 'ST', 'UT']:
    value_list = [ value ]
    # for AT, convert dicomdump sytax to DICOM json syntax
    elif vr == 'AT':
    value_list = []
    for ptr in value.split('\\'):
    value_list.append(ptr[1:5]+ptr[6:10])
    # for PN, handle "Alphabetic", "Ideographic", "Phonetic" groups
    elif vr == 'PN':
    value_list = []
    for name in value.split('\\'):
    name_attrs = {}
    parts = name.split('=')
    name_attrs['Alphabetic'] = parts[0]
    if len(parts) > 1:
    name_attrs['Ideographic'] = parts[1]
    if len(parts) > 2:
    name_attrs['Phonetic'] = parts[2]
    value_list.append(name_attrs)
    # for all other VRs
    else:
    value_list = value.split('\\')
    # replace any empty values with null
    for i in range(len(value_list)):
    if value_list[i] == "":
    value_list[i] = None
    return dicttype([("vr", vr), ("Value", value_list)])


    def handle_instances(sequence, instances):
    """Handle "multiple values" by recreating multiple datasets.
    """
    if instances:
    # make copies of last sequence
    last_dataset = sequence[-1]
    sequence.pop()
    n = 0
    for tag in instances:
    n = max(n, len(instances[tag]))
    for i in range(n):
    dataset = dicttype(last_dataset)
    for tag in instances:
    try:
    dataset[tag] = instances[tag][i]
    except IndexError:
    # TODO: warning
    pass
    sequence.append(dataset)
    instances.clear()


    def skip_tag(tag):
    """Returns True for tags that should be skipped.
    """
    # group length tags
    if tag[-4:] == '0000':
    return True
    # tags in group 0002, 0004, etc
    elif tag[0:4] < '0008':
    return True
    return False


    def read_dicomdump(lines):
    """Parse a dicomdump file that has been read with "readlines".
    """
    # a sequence of datasets will be read (usually just one)
    sequence = []
    dataset = None

    # a stack is needed for handling the depth of the tree
    stack = []

    # for dicomdump's "multiple values" across a series
    instance_tag = None
    instance_vr = None
    instances = {}

    # for dealing with a bug in dicomdump for series where
    # the first dataset is missing elements
    vr_mismatch = ("", "")

    # go through the dump line-by-line
    for line in lines:
    line = line.rstrip()

    # empty line: ignore
    if len(line) == 0:
    continue

    # mismatched VR warning: ignore
    m = re_mismatch.match(line)
    if m:
    groups = m.groups()
    vr_mismatch = (groups[1], groups[2])
    continue

    # check the indentation, which indicates depth
    if not (re_instance.match(line) or re_instance_sq.match(line)):
    m = re_indent.match(line)
    indent = len(m.group(1))/2
    # check for extra indentation that isn't in a sequence
    if indent > len(stack) and not re_instance.match(line):
    sys.stderr.write("Improper indentation:\n" + line + "\n")
    continue
    # check for decreasing indentation (marks end of a block)
    while len(stack) > indent:
    sequence, dataset = stack[-1]
    stack.pop()

    # new dataset (indicated by "====" in the file)
    m = re_dataset.match(line)
    if m:
    handle_instances(sequence, instances)
    # start a fresh dataset
    dataset = dicttype()
    sequence.append(dataset)
    continue

    # new item (indicated by "----" in the file)
    m = re_item.match(line)
    if m:
    dataset = dicttype()
    sequence.append(dataset)
    continue

    # sequence value (increase depth)
    m = re_sequence.match(line)
    if m:
    groups = m.groups()
    tag = "".join(groups[1:3]).upper()
    vr = groups[3]
    stack.append((sequence, dataset))
    sequence = []
    dataset[tag] = dicttype([("vr", vr), ("Value", sequence)])
    dataset = None
    continue

    # any other value
    m = re_data.match(line)
    if m:
    groups = m.groups()
    tag = "".join(groups[1:3]).upper()
    # skip group length tags
    if skip_tag(tag):
    continue
    keyword = groups[4]
    vr = groups[3]
    vl = groups[6]
    value = groups[5]
    dataset[tag] = build_value(vr, vl, value)
    continue

    # ----
    # special code for the dicomdump "multiple values" lines
    m = re_multi.match(line)
    if m:
    groups = m.groups()
    instance_tag = "".join(groups[1:3]).upper()
    instance_vr = groups[3]
    if instance_vr == "" and vr_mismatch[0] == "":
    instance_vr = vr_mismatch[1]
    # skip group length tags
    if skip_tag(instance_tag):
    continue
    dataset[instance_tag] = dicttype([("vr", instance_vr)])
    instances[instance_tag] = []
    if instance_vr == 'SQ':
    stack.append((sequence, dataset))
    continue

    # one instance of a "multiple value" attribute
    m = re_instance.match(line)
    if m:
    groups = m.groups()
    # skip group length tags
    if skip_tag(instance_tag):
    continue
    value = groups[2]
    vl = groups[3]
    instances[instance_tag].append(build_value(instance_vr, vl, value))
    if len(instances[instance_tag]) == 16:
    print("XXX", instance_tag, line)
    continue

    # one instance of a "multiple value" attribute that is SQ
    m = re_instance_sq.match(line)
    if m:
    sequence = []
    dataset = None
    instances[instance_tag].append(dicttype([("vr", instance_vr), ("Value", sequence)]))
    continue

    # change out "multiple value" data elements into a series of datasets
    handle_instances(sequence, instances)
    return sequence


    def main():
    parser = argparse.ArgumentParser(description="Read dicomdump output.")
    parser.add_argument('input', help="Input file (DICOM).")
    parser.add_argument('-o', '--output', required=False,
    help="Output file (json).")
    args = parser.parse_args()

    with open(args.input) as f:
    tree = read_dicomdump(f.readlines())

    json_opts = {
    "indent" : 2,
    "separators" : (",", " : "),
    }

    if args.output:
    with open(args.output, 'w') as f:
    json.dump(tree, f, **json_opts)
    else:
    json.dump(tree, sys.stdout, **json_opts)


    if __name__ == '__main__':
    main()