Last active
June 21, 2023 06:25
-
-
Save perXautomatik/f81d928ccbcaa69f9fa68e9686f0f97d to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| --show the top 10 pairs of folders that have the most files in common. | |
| -- Create a CTE named folders that selects the group ID, folder and filename from dupes table | |
| -- and adds a column named full_path that combines the folder and filename with a backslash | |
| with folders as ( | |
| select [Group ID] as group_id, Folder || '\' || filename as full_path from dupes | |
| ), | |
| -- Create a CTE named split that recursively splits the full_path column into single characters | |
| -- and appends them to a column named path_prefix, while keeping track of the last two characters | |
| -- in columns last_char and common_denominator | |
| split AS ( | |
| select * from q | |
| UNION ALL | |
| SELECT id,s || substr(rest, 1, 1) as ss, | |
| substr(rest, 1, 1) as last2, | |
| substr(rest, 2) as last3 FROM split | |
| WHERE rest <> '' | |
| ) | |
| ,qq as (SELECT id,s,last2,rest FROM split WHERE (rest = '' OR last2 = '\') and s != last2 and last2 = '\') | |
| ,qzy as (select a.s as LeftX,b.s as RightX,b.id as bs from qq as a cross join qq as b where a.rest = b.rest and a.s != b.s group by bs order by length(a.rest)) | |
| select leftX,rightX,count(*) c from qzy group by leftx,rightx | |
| order by c desc limit 10 | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment