Skip to content

Instantly share code, notes, and snippets.

@SamWolski
Created May 19, 2023 18:36
Show Gist options
  • Select an option

  • Save SamWolski/a276902f6d1ebfc08b95c4ef6ac009a6 to your computer and use it in GitHub Desktop.

Select an option

Save SamWolski/a276902f6d1ebfc08b95c4ef6ac009a6 to your computer and use it in GitHub Desktop.

Revisions

  1. SamWolski created this gist May 19, 2023.
    254 changes: 254 additions & 0 deletions text2wkt.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,254 @@
    '''
    Convert text in any installed font to the WKT format.
    Requires inkscape.
    Does not currently work for characters with disjoint top-level islands (eg "=",
    double quotes, capital Xi, etc.)
    '''

    import json
    import subprocess
    import xml.etree.ElementTree as etree

    import shapely
    from shapely import affinity
    from svgpath2mpl import parse_path
    import svgwrite

    ###############################################################################

    class GeoTree:
    "Generic tree node for geometry objects"
    tree_id = 0
    def __init__(self, geometry, children=None):
    self.id = GeoTree.tree_id
    GeoTree.tree_id += 1
    self.geometry = geometry
    self.children = []
    if children is not None:
    for child in children:
    self.add_child(child)
    def __repr__(self):
    return str("<Tree"+str(self.id)+">")
    def add_child(self, node):
    assert isinstance(node, GeoTree)
    self.children.append(node)


    def tree_contains(tree1, tree2):
    '''Test tree objects for containment'''
    if tree1 and tree2:
    return shapely.contains(tree1.geometry, tree2.geometry)
    else:
    return False


    def recursive_order(current, new, order_fn=tree_contains):
    '''Add a new GeoTree object to the hierarchy based on the current root
    This function is recursive, so be careful!
    Returns the new root.
    '''
    ## Check if current contains new, new contains current, or there
    ## is no intersection
    if order_fn(current, new):
    # print(f"{current} contains {new}")
    ## New is contained within current
    ## It must either be a direct child, or go further down the
    ## line
    ## Iterate over children and recurse
    for child in current.children:
    result = recursive_order(child, new)
    ## If any of the children contain the new, the returned
    ## value will be the child
    if result == child:
    ## In this case, everything is taken care of below
    ## so we can just return the current root
    return current
    ## If instead we receive the new, it means that it has
    ## displaced the child
    elif result == new:
    ## We need to remove this child as a child of the
    ## current so it can be only a child of the new
    current.children.remove(child)
    ## It has already been assigned as a child of the new
    ## in the recursive function
    ## We can't exit here because more children may belong
    ## to the new
    ## We've iterated through all the children
    ## All that's left is to add new as a child of current
    ## It will retain any children that weren't added to new
    ## Those might be all of them if new has no intersections with
    ## any of the existing children
    current.add_child(new)
    ## The root remains the same
    return current
    elif order_fn(new, current):
    # print(f"{new} contains {current}")
    ## Current is contained within new
    new.add_child(current)
    ## New is the new root
    return new
    else:
    # print(f"No intersection between {new} and {current}")
    ## There is no intersection between current and new
    return None


    def recursive_collapse(tree_root, collapse_fn=shapely.difference):
    '''Collapse a GeoTree
    Recursively starts at the leaves, and works its way up to the root
    Returns shapely geometry, NOT GeoTree objects!
    '''
    # print(f"Collapsing at {tree_root}")
    ## Get the current geometry (may be updated by operations with
    ## children)
    current_geo = tree_root.geometry
    ## If there are children, we apply the function recursively
    for child in tree_root.children:
    # print(f"Checking child {child}")
    current_geo = collapse_fn(current_geo,
    recursive_collapse(child,
    collapse_fn))
    ## Return the geometry including all child operations
    return current_geo


    ###############################################################################

    ## Corrective transformation constants
    ## NB I have no idea if these are the same for all fonts?
    Y_BASELINE = -3.75
    SHRINK_FACT = 1/(10+Y_BASELINE)


    def text2wkt(font_family,
    input_chars="1234567890ABCDEFGHIJKLMNOPQRSTUVWXYZ",
    ):

    ## Container for storing output WKT strings
    wkt_dict = {}

    for char in input_chars:

    ## Write chars to svg
    #####################

    ## Set up temp svg file
    dwg = svgwrite.Drawing(filename="_temp.svg")
    ## Write characters to svg
    gen_elem = dwg.g(style=f"font-size:10;font-family:{font_family}")
    gen_elem.add(dwg.text(char, insert=(0,10)))
    dwg.add(gen_elem)
    ## Save temp svg
    ## TODO this can be done with pipes without writing to a temp file!
    dwg.save()


    ## Convert svg text elements to path
    ####################################

    subprocess.call(["inkscape", "_temp.svg", "-T", "-l",
    "--export-filename", "_temp.svg"])


    ## Convert svg path to shapely geometry
    #######################################

    ## Load svg file and parse path elements
    tree = etree.parse("_temp.svg")
    root = tree.getroot()
    path_elems = root.findall('.//{http://www.w3.org/2000/svg}path')
    ## Convert to mpl paths
    paths = [parse_path(elem.attrib['d']) for elem in path_elems]
    ## Convert to mpl polygons
    polys = [path.to_polygons() for path in paths]

    ## Extract the first item (sole element in the file)
    char_polys = polys[0]

    ## Convert to shapely polygons
    char_geos = [shapely.geometry.Polygon(poly) for poly in char_polys]


    ## Assign hierarchy to geometry based on containment
    ####################################################

    ## Most (all?) character glyphs have well-defined holes and not-holes.
    ## Using containment as an ordering function, one can build a
    ## hierarchy, and then collapse it using subtraction/difference
    ## operations.
    ## We can't do it in general without such a construction, as it is not
    ## guaranteed that poly elements will intersect (eg the inner holes in
    ## B, 8, etc.)
    ## TODO Currently this does not support non-intersecting top-level
    ## polygons, such as =, ", %, or non-intersecting accents.

    ## Convert shapely geometries to GeoTree objects
    geos_tree = [GeoTree(geo) for geo in char_geos]

    ## Build hierarchy
    tree_base = None
    ## We want to pull from the list because we may need to reorder in case
    ## non-intersecting pairs are evaluated
    while geos_tree:
    ## Get the first item (we want to append at the end if it doesn't
    ## work)
    new_geo = geos_tree.pop(0)
    ## If there is no existing base, assign it and go to next
    if not tree_base:
    tree_base = new_geo
    # print(f"{tree_base} assigned as base")
    continue
    ## Compare to the base
    new_base = recursive_order(tree_base, new_geo)
    ## If it is not None, we're valid and we can keep going
    if new_base:
    tree_base = new_base
    ## If it's None, we have non-intersecting polygons so we bump them
    ## right back to the end of the list, and re-try with the same base
    else:
    geos_tree.append(new_geo)


    ## Collapse hierarchy by intersecting contained geometries
    ##########################################################

    final_geo = recursive_collapse(tree_base)


    ## Corrective transformations
    #############################

    ## Translate baseline
    shifted = affinity.translate(final_geo, yoff=Y_BASELINE)
    ## Flip on y axis and shrink to unit size accounting for baseline
    scaled = affinity.scale(shifted, xfact=SHRINK_FACT,
    yfact=-SHRINK_FACT, origin=(0,0))


    ## Export
    #########

    wkt_dict[char] = scaled.wkt

    ## Return all WKT strings
    return wkt_dict


    ###############################################################################

    if __name__ == "__main__":

    ## Generate WKT strings
    wkts = text2wkt(font_family="Mindfuct NBP",
    # input_chars="1234",
    )

    ## Dump to file
    with open("out.json", "w", encoding="utf-8") as out_file:
    json.dump(wkts, out_file, ensure_ascii=False, indent=4)