--show the top 10 pairs of folders that have the most files in common. -- Create a CTE named folders that selects the group ID, folder and filename from dupes table -- and adds a column named full_path that combines the folder and filename with a backslash with folders as ( select [Group ID] as group_id, Folder || '\' || filename as full_path from dupes ), -- Create a CTE named split that recursively splits the full_path column into single characters -- and appends them to a column named path_prefix, while keeping track of the last two characters -- in columns last_char and common_denominator split AS ( select * from q UNION ALL SELECT id,s || substr(rest, 1, 1) as ss, substr(rest, 1, 1) as last2, substr(rest, 2) as last3 FROM split WHERE rest <> '' ) ,qq as (SELECT id,s,last2,rest FROM split WHERE (rest = '' OR last2 = '\') and s != last2 and last2 = '\') ,qzy as (select a.s as LeftX,b.s as RightX,b.id as bs from qq as a cross join qq as b where a.rest = b.rest and a.s != b.s group by bs order by length(a.rest)) select leftX,rightX,count(*) c from qzy group by leftx,rightx order by c desc limit 10