Examples (using 1M or 10M frequency list from Gutenberg): $ python segmentation.py penisland ... (['p', 'en', 'island'], -15.638518785248923) (['pe', 'nis', 'land'], -15.108322797049299) (['pen', 'island'], -8.375691641879506) (['peni', 'sl', 'and'], -14.687786244444453) (['penis', 'land'], -9.672682030503244) (['penisl', 'and'], -15.235864890453465) (['penisla', 'nd'], -20.26133790488834) (['penislan', 'd'], -24.41870399665061) (['penisland'], -16.709351998325303) ['pen', 'island'] $ python segmentation.py therapistfinder ... (['t', 'he', 'rapist', 'finder'], -24.748890963614368) (['th', 'era', 'pist', 'finder'], -21.212173151805697) (['the', 'rapist', 'finder'], -15.237632013697695) (['ther', 'a', 'pist', 'finder'], -19.589487264674233) (['thera', 'pist', 'finder'], -19.16492932696031) (['therap', 'ist', 'finder'], -17.992610282256244) (['therapi', 'st', 'finder'], -24.47141671545925) (['therapis', 'tf', 'in', 'der'], -21.371078882293887) (['therapist', 'finder'], -13.623547247250375) (['therapistf', 'in', 'der'], -22.861482061222056) (['therapistfi', 'nder'], -25.98413509261641) (['therapistfin', 'der'], -23.10614521275257) (['therapistfind', 'er'], -24.395855144970255) (['therapistfinde', 'r'], -30.418703996650606) (['therapistfinder'], -22.709351998325303) ['therapist', 'finder'] $ python segmentation.py whorepresents ... (['w', 'ho', 'represents'], -18.14739742951673) (['wh', 'ore', 'presents'], -15.423500721603162) (['who', 'represents'], -7.398759138614212) (['whor', 'ep', 'resents'], -20.08562578090216) (['whore', 'presents'], -10.445817549886325) (['whorep', 'resents'], -15.673004770048102) (['whorepr', 'esents'], -23.816644005322647) (['whorepre', 'sents'], -23.098557710539556) (['whorepres', 'ents'], -23.31936871896465) (['whoreprese', 'nts'], -25.459662604329512) (['whorepresen', 'ts'], -24.458042989379624) (['whorepresent', 's'], -28.418703996650606) (['whorepresents'], -20.709351998325303) ['who', 'represents'] It's funny to see it *work* even with Kauderwelsch: $ python segmentation.py zugarrivesatgaredunord (['z', 'ug', 'arrives', 'at', 'gare', 'du', 'nord'], -32.50373110714555) (['zu', 'g', 'arrives', 'at', 'gare', 'du', 'nord'], -29.771130940523193) (['zug', 'arrives', 'at', 'gare', 'du', 'nord'], -27.6931533335704) (['zuga', 'rrivesatgaredunord'], -37.43176816321052) (['zugar', 'rives', 'at', 'gare', 'du', 'nord'], -35.27313676138066) (['zugarr', 'ive', 'sat', 'gare', 'du', 'nord'], -35.62167660702414) (['zugarri', 'vesat', 'gare', 'du', 'nord'], -37.06730572297619) (['zugarriv', 'es', 'at', 'gare', 'du', 'nord'], -36.19798266766783) (['zugarrive', 'sat', 'gare', 'du', 'nord'], -34.778801974903615) (['zugarrives', 'at', 'gare', 'du', 'nord'], -34.347608226189784) (['zugarrivesa', 'tg', 'are', 'du', 'nord'], -36.61899603139459) (['zugarrivesat', 'gare', 'du', 'nord'], -34.06742498500572) (['zugarrivesatg', 'are', 'du', 'nord'], -31.83761040103289) (['zugarrivesatga', 'red', 'un', 'ord'], -34.25155677971655) (['zugarrivesatgar', 'e', 'du', 'nord'], -34.7329574229286) (['zugarrivesatgare', 'du', 'nord'], -32.353490844484725) (['zugarrivesatgared', 'un', 'ord'], -33.482848511720604) (['zugarrivesatgaredu', 'nord'], -30.97651206109257) (['zugarrivesatgaredun', 'ord'], -32.2095721169088) (['zugarrivesatgareduno', 'rd'], -33.55078317272377) (['zugarrivesatgaredunor', 'd'], -32.66984483306061) (['zugarrivesatgaredunord'], -29.71588408160526) ['zug', 'arrives', 'at', 'gare', 'du', 'nord']