Last active
August 7, 2020 17:29
-
-
Save dgobbi/c5e9bfbe0459e3ca0471bbaaec35d4c5 to your computer and use it in GitHub Desktop.
Revisions
-
dgobbi revised this gist
Aug 7, 2020 . 1 changed file with 3 additions and 0 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -257,6 +257,9 @@ def read_dicomdump(lines): instances[instance_tag].append(dicttype([("vr", instance_vr), ("Value", sequence)])) continue # none of the regular expressions matched! sys.stderr.write("Unrecognized syntax:\n" + line + "\n") # at end of dump, pop back to root while len(stack) > 0: sequence, dataset = stack[-1] -
dgobbi revised this gist
Aug 7, 2020 . 1 changed file with 18 additions and 0 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -49,6 +49,24 @@ def build_value(vr, vl, value): value_list = [] for ptr in value.split('\\'): value_list.append(ptr[1:5]+ptr[6:10]) # for integers, convert to int elif vr in ['IS', 'SS', 'US', 'SL', 'UL', 'SV', 'UV']: value_list = [] for v in value.split('\\'): try: value_list.append(int(v)) except ValueError: # TODO: warn pass # for decimal, convert to float (inexact) elif vr in ['DS', 'FL', 'FD']: value_list = [] for v in value.split('\\'): try: value_list.append(float(v)) except ValueError: # TODO: warn pass # for PN, handle "Alphabetic", "Ideographic", "Phonetic" groups elif vr == 'PN': value_list = [] -
dgobbi revised this gist
Aug 7, 2020 . 1 changed file with 4 additions and 4 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -22,9 +22,9 @@ # regular expressions for parsing dicomdump output re_dataset = re.compile("==== (.*) ====") re_data = re.compile("( *)\\((....),(....)\\) (..|) \"([^\"]*)\" : \\[(.*)\\] (\\{[^}]*\\}|) *\\(([^)]*)\\)") re_multi = re.compile("( *)\\((....),(....)\\) (..|) \"([^\"]*)\" : \\(multiple values\\)") re_instance = re.compile("( *) 0*([0-9]*) \\[(.*)\\] (\\{[^}]*\\}|) *\\(([^)]*)\\)") re_instance_sq = re.compile("( *) 0*([0-9]*) \\(([0-9]*) item[s]?([^)]*)\\)") re_sequence = re.compile("( *)\\((....),(....)\\) (..|) \"([^\"]*)\" : \\(([0-9]*) item[s]?([^)]*)\\)") re_item = re.compile("( *)---- SQ Item 0*([0-9]*) at offset ([0-9]*) ----") @@ -196,7 +196,7 @@ def read_dicomdump(lines): continue keyword = groups[4] vr = groups[3] vl = groups[7] value = groups[5] dataset[tag] = build_value(vr, vl, value) continue @@ -227,7 +227,7 @@ def read_dicomdump(lines): if skip_tag(instance_tag): continue value = groups[2] vl = groups[4] instances[instance_tag].append(build_value(instance_vr, vl, value)) continue -
dgobbi revised this gist
Aug 7, 2020 . 1 changed file with 5 additions and 5 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -22,11 +22,11 @@ # regular expressions for parsing dicomdump output re_dataset = re.compile("==== (.*) ====") re_data = re.compile("( *)\\((....),(....)\\) (..|) \"([^\"]*)\" : \\[(.*)\\] [^(]*\\(([^)]*)\\)") re_multi = re.compile("( *)\\((....),(....)\\) (..|) \"([^\"]*)\" : \\(multiple values\\)") re_instance = re.compile("( *) 0*([0-9]*) \\[(.*)\\] \\(([^)]*)\\)") re_instance_sq = re.compile("( *) 0*([0-9]*) \\(([0-9]*) item[s]?([^)]*)\\)") re_sequence = re.compile("( *)\\((....),(....)\\) (..|) \"([^\"]*)\" : \\(([0-9]*) item[s]?([^)]*)\\)") re_item = re.compile("( *)---- SQ Item 0*([0-9]*) at offset ([0-9]*) ----") re_mismatch = re.compile("( *)VR mismatch! (..|) != (..) (.*)") re_indent = re.compile("( *).*") @@ -36,7 +36,7 @@ def build_value(vr, vl, value): """Convert an attribute value from a dicomdump file to DICOM's json format. """ # if VL is zero, then no value is given if vl == '0 bytes': return dicttype([("vr", vr)]) # if bulk data, use empty BulkDataURI (TODO: InlineBinary) if vr in ['OB', 'OD', 'OF', 'OL', 'OV', 'OW']: -
dgobbi revised this gist
Aug 7, 2020 . 1 changed file with 23 additions and 11 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -140,17 +140,23 @@ def read_dicomdump(lines): continue # check the indentation, which indicates depth if re_instance.match(line): # always at the root, depth of zero depth = 0 elif re_instance_sq.match(line): # always within a sequence at the root, hence depth is 1 depth = 1 else: # the depth is given by the indentation depth = len(re_indent.match(line).group(1))/2 # check for extra indentation that isn't in a sequence if depth > len(stack): sys.stderr.write("Improper indentation:\n" + line + "\n") continue # check for decreasing indentation (marks end of a block) while len(stack) > depth: sequence, dataset = stack[-1] stack.pop() # new dataset (indicated by "====" in the file) m = re_dataset.match(line) @@ -233,8 +239,14 @@ def read_dicomdump(lines): instances[instance_tag].append(dicttype([("vr", instance_vr), ("Value", sequence)])) continue # at end of dump, pop back to root while len(stack) > 0: sequence, dataset = stack[-1] stack.pop() # change out "multiple value" data elements into a series of datasets handle_instances(sequence, instances) return sequence -
dgobbi revised this gist
Aug 7, 2020 . 1 changed file with 0 additions and 2 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -223,8 +223,6 @@ def read_dicomdump(lines): value = groups[2] vl = groups[3] instances[instance_tag].append(build_value(instance_vr, vl, value)) continue # one instance of a "multiple value" attribute that is SQ -
dgobbi revised this gist
Aug 7, 2020 . 1 changed file with 14 additions and 14 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -40,27 +40,27 @@ def build_value(vr, vl, value): return dicttype([("vr", vr)]) # if bulk data, use empty BulkDataURI (TODO: InlineBinary) if vr in ['OB', 'OD', 'OF', 'OL', 'OV', 'OW']: return dicttype([("vr", vr), ("BulkDataURI", "")]) # these text VRs are always single-valued if vr in ['LT', 'ST', 'UT']: value_list = [ value ] # for AT, convert dicomdump sytax to DICOM json syntax elif vr == 'AT': value_list = [] for ptr in value.split('\\'): value_list.append(ptr[1:5]+ptr[6:10]) # for PN, handle "Alphabetic", "Ideographic", "Phonetic" groups elif vr == 'PN': value_list = [] for name in value.split('\\'): name_attrs = {} parts = name.split('=') name_attrs['Alphabetic'] = parts[0] if len(parts) > 1: name_attrs['Ideographic'] = parts[1] if len(parts) > 2: name_attrs['Phonetic'] = parts[2] value_list.append(name_attrs) # for all other VRs else: value_list = value.split('\\') -
dgobbi created this gist
Aug 7, 2020 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,266 @@ #! /usr/bin/env python """ This program reads the output from "dicomdump" and converts it to json. The latest version of this code can be found at gist.github.com/dgobbi Note that this code is incomplete, incorrect, and may destroy your data. It comes with absolutely no warranties. Use at your own risk. """ import argparse import sys import re import json import collections # the python dict type we want to use is "OrderedDict" dicttype = collections.OrderedDict # regular expressions for parsing dicomdump output re_dataset = re.compile("==== (.*) ====") re_data = re.compile("( *)\\((....),(....)\\) (..|) \"([^\"]*)\" : \\[(.*)\\] \\(([0-9]*) bytes\\)") re_multi = re.compile("( *)\\((....),(....)\\) (..|) \"([^\"]*)\" : \\(multiple values\\)") re_instance = re.compile("( *) 0*([0-9]*) \\[(.*)\\] \\(([0-9]*) bytes\\)") re_instance_sq = re.compile("( *) 0*([0-9]*) \\(([0-9]*) item[s]?(, delimited|)\\)") re_sequence = re.compile("( *)\\((....),(....)\\) (..|) \"([^\"]*)\" : \\(([0-9]*) item[s]?(, delimited|)\\)") re_item = re.compile("( *)---- SQ Item 0*([0-9]*) at offset ([0-9]*) ----") re_mismatch = re.compile("( *)VR mismatch! (..|) != (..) (.*)") re_indent = re.compile("( *).*") def build_value(vr, vl, value): """Convert an attribute value from a dicomdump file to DICOM's json format. """ # if VL is zero, then no value is given if vl == '0': return dicttype([("vr", vr)]) # if bulk data, use empty BulkDataURI (TODO: InlineBinary) if vr in ['OB', 'OD', 'OF', 'OL', 'OV', 'OW']: return dicttype([("vr", vr), ("BulkDataURI", "")]) # these text VRs are always single-valued if vr in ['LT', 'ST', 'UT']: value_list = [ value ] # for AT, convert dicomdump sytax to DICOM json syntax elif vr == 'AT': value_list = [] for ptr in value.split('\\'): value_list.append(ptr[1:5]+ptr[6:10]) # for PN, handle "Alphabetic", "Ideographic", "Phonetic" groups elif vr == 'PN': value_list = [] for name in value.split('\\'): name_attrs = {} parts = name.split('=') name_attrs['Alphabetic'] = parts[0] if len(parts) > 1: name_attrs['Ideographic'] = parts[1] if len(parts) > 2: name_attrs['Phonetic'] = parts[2] value_list.append(name_attrs) # for all other VRs else: value_list = value.split('\\') # replace any empty values with null for i in range(len(value_list)): if value_list[i] == "": value_list[i] = None return dicttype([("vr", vr), ("Value", value_list)]) def handle_instances(sequence, instances): """Handle "multiple values" by recreating multiple datasets. """ if instances: # make copies of last sequence last_dataset = sequence[-1] sequence.pop() n = 0 for tag in instances: n = max(n, len(instances[tag])) for i in range(n): dataset = dicttype(last_dataset) for tag in instances: try: dataset[tag] = instances[tag][i] except IndexError: # TODO: warning pass sequence.append(dataset) instances.clear() def skip_tag(tag): """Returns True for tags that should be skipped. """ # group length tags if tag[-4:] == '0000': return True # tags in group 0002, 0004, etc elif tag[0:4] < '0008': return True return False def read_dicomdump(lines): """Parse a dicomdump file that has been read with "readlines". """ # a sequence of datasets will be read (usually just one) sequence = [] dataset = None # a stack is needed for handling the depth of the tree stack = [] # for dicomdump's "multiple values" across a series instance_tag = None instance_vr = None instances = {} # for dealing with a bug in dicomdump for series where # the first dataset is missing elements vr_mismatch = ("", "") # go through the dump line-by-line for line in lines: line = line.rstrip() # empty line: ignore if len(line) == 0: continue # mismatched VR warning: ignore m = re_mismatch.match(line) if m: groups = m.groups() vr_mismatch = (groups[1], groups[2]) continue # check the indentation, which indicates depth if not (re_instance.match(line) or re_instance_sq.match(line)): m = re_indent.match(line) indent = len(m.group(1))/2 # check for extra indentation that isn't in a sequence if indent > len(stack) and not re_instance.match(line): sys.stderr.write("Improper indentation:\n" + line + "\n") continue # check for decreasing indentation (marks end of a block) while len(stack) > indent: sequence, dataset = stack[-1] stack.pop() # new dataset (indicated by "====" in the file) m = re_dataset.match(line) if m: handle_instances(sequence, instances) # start a fresh dataset dataset = dicttype() sequence.append(dataset) continue # new item (indicated by "----" in the file) m = re_item.match(line) if m: dataset = dicttype() sequence.append(dataset) continue # sequence value (increase depth) m = re_sequence.match(line) if m: groups = m.groups() tag = "".join(groups[1:3]).upper() vr = groups[3] stack.append((sequence, dataset)) sequence = [] dataset[tag] = dicttype([("vr", vr), ("Value", sequence)]) dataset = None continue # any other value m = re_data.match(line) if m: groups = m.groups() tag = "".join(groups[1:3]).upper() # skip group length tags if skip_tag(tag): continue keyword = groups[4] vr = groups[3] vl = groups[6] value = groups[5] dataset[tag] = build_value(vr, vl, value) continue # ---- # special code for the dicomdump "multiple values" lines m = re_multi.match(line) if m: groups = m.groups() instance_tag = "".join(groups[1:3]).upper() instance_vr = groups[3] if instance_vr == "" and vr_mismatch[0] == "": instance_vr = vr_mismatch[1] # skip group length tags if skip_tag(instance_tag): continue dataset[instance_tag] = dicttype([("vr", instance_vr)]) instances[instance_tag] = [] if instance_vr == 'SQ': stack.append((sequence, dataset)) continue # one instance of a "multiple value" attribute m = re_instance.match(line) if m: groups = m.groups() # skip group length tags if skip_tag(instance_tag): continue value = groups[2] vl = groups[3] instances[instance_tag].append(build_value(instance_vr, vl, value)) if len(instances[instance_tag]) == 16: print("XXX", instance_tag, line) continue # one instance of a "multiple value" attribute that is SQ m = re_instance_sq.match(line) if m: sequence = [] dataset = None instances[instance_tag].append(dicttype([("vr", instance_vr), ("Value", sequence)])) continue # change out "multiple value" data elements into a series of datasets handle_instances(sequence, instances) return sequence def main(): parser = argparse.ArgumentParser(description="Read dicomdump output.") parser.add_argument('input', help="Input file (DICOM).") parser.add_argument('-o', '--output', required=False, help="Output file (json).") args = parser.parse_args() with open(args.input) as f: tree = read_dicomdump(f.readlines()) json_opts = { "indent" : 2, "separators" : (",", " : "), } if args.output: with open(args.output, 'w') as f: json.dump(tree, f, **json_opts) else: json.dump(tree, sys.stdout, **json_opts) if __name__ == '__main__': main()