Last active
          November 29, 2021 23:11 
        
      - 
      
 - 
        
Save sk1p/cd83ba1e22b8650d3aefe40f3e9c7e70 to your computer and use it in GitHub Desktop.  
    What if you have filenames with latin1 encoding, but also ones with utf-8 in the same file system tree?
  
        
  
    
      This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
      Learn more about bidirectional Unicode characters
    
  
  
    
  | #!/usr/bin/env python3 | |
| import os | |
| from typing import List, Dict | |
| import subprocess | |
| import click | |
| def clean_your_shit(bytes_name: bytes) -> bytes: | |
| try: | |
| bytes_name.decode("utf-8").encode("utf-8") | |
| return bytes_name | |
| except Exception: | |
| return bytes_name.decode("latin1").encode("utf8") | |
| def rename(old_path: bytes, new_path: bytes): | |
| # shutil.move does _not_ like bytes (because it tries to join | |
| # internally using os.path.sep, which is a str, which _you | |
| # can't combine with bytes objects, you dummy!) | |
| # shutil.move(old_path, new_path) | |
| # fucking table flip: | |
| subprocess.check_call(["mv", old_path, new_path]) | |
| def handle_dirs( | |
| base_path: bytes, dirnames: List[bytes], really_do_it: bool | |
| ) -> Dict[bytes, bytes]: | |
| result: Dict[bytes, bytes] = {} | |
| for old_name in dirnames: | |
| new_name = clean_your_shit(old_name) | |
| if new_name != old_name: | |
| print( | |
| f"rename directory: {old_name} -> {new_name.decode('utf8')}" | |
| ) | |
| if really_do_it: | |
| full_old_path = os.path.join(base_path, old_name) | |
| full_new_path = os.path.join(base_path, new_name) | |
| rename(full_old_path, full_new_path) | |
| result[old_name] = new_name | |
| return result | |
| def handle_files(base_path: bytes, filenames: List[bytes], really_do_it: bool): | |
| for old_name in filenames: | |
| new_name = clean_your_shit(old_name) | |
| if new_name != old_name: | |
| print( | |
| f"rename file: {repr(old_name)} -> {new_name.decode('utf8')}" | |
| ) | |
| if really_do_it: | |
| full_old_path = os.path.join(base_path, old_name) | |
| full_new_path = os.path.join(base_path, new_name) | |
| rename(full_old_path, full_new_path) | |
| @click.command() | |
| @click.argument('base_path', type=click.Path()) | |
| @click.option('--really-do-it', default=False, is_flag=True) | |
| def main(base_path, really_do_it): | |
| walker = os.walk(base_path.encode("utf-8"), topdown=True) | |
| for dirpath, dirnames, filenames in walker: | |
| renames = handle_dirs(dirpath, dirnames, really_do_it) | |
| handle_files(dirpath, filenames, really_do_it) | |
| # print(dirpath, dirnames, filenames) | |
| # because of topdown=True, we can inform `os.walk` about the new | |
| # directory names by replacing them in `dirnames`: | |
| for old_name, new_name in renames.items(): | |
| idx = dirnames.index(old_name) | |
| dirnames[idx] = new_name | |
| if __name__ == "__main__": | |
| main() | 
  
    Sign up for free
    to join this conversation on GitHub.
    Already have an account?
    Sign in to comment