Skip to content

Instantly share code, notes, and snippets.

@sk1p
Last active November 29, 2021 23:11
Show Gist options
  • Save sk1p/cd83ba1e22b8650d3aefe40f3e9c7e70 to your computer and use it in GitHub Desktop.
Save sk1p/cd83ba1e22b8650d3aefe40f3e9c7e70 to your computer and use it in GitHub Desktop.
What if you have filenames with latin1 encoding, but also ones with utf-8 in the same file system tree?
#!/usr/bin/env python3
import os
from typing import List, Dict
import subprocess
import click
def clean_your_shit(bytes_name: bytes) -> bytes:
try:
bytes_name.decode("utf-8").encode("utf-8")
return bytes_name
except Exception:
return bytes_name.decode("latin1").encode("utf8")
def rename(old_path: bytes, new_path: bytes):
# shutil.move does _not_ like bytes (because it tries to join
# internally using os.path.sep, which is a str, which _you
# can't combine with bytes objects, you dummy!)
# shutil.move(old_path, new_path)
# fucking table flip:
subprocess.check_call(["mv", old_path, new_path])
def handle_dirs(
base_path: bytes, dirnames: List[bytes], really_do_it: bool
) -> Dict[bytes, bytes]:
result: Dict[bytes, bytes] = {}
for old_name in dirnames:
new_name = clean_your_shit(old_name)
if new_name != old_name:
print(
f"rename directory: {old_name} -> {new_name.decode('utf8')}"
)
if really_do_it:
full_old_path = os.path.join(base_path, old_name)
full_new_path = os.path.join(base_path, new_name)
rename(full_old_path, full_new_path)
result[old_name] = new_name
return result
def handle_files(base_path: bytes, filenames: List[bytes], really_do_it: bool):
for old_name in filenames:
new_name = clean_your_shit(old_name)
if new_name != old_name:
print(
f"rename file: {repr(old_name)} -> {new_name.decode('utf8')}"
)
if really_do_it:
full_old_path = os.path.join(base_path, old_name)
full_new_path = os.path.join(base_path, new_name)
rename(full_old_path, full_new_path)
@click.command()
@click.argument('base_path', type=click.Path())
@click.option('--really-do-it', default=False, is_flag=True)
def main(base_path, really_do_it):
walker = os.walk(base_path.encode("utf-8"), topdown=True)
for dirpath, dirnames, filenames in walker:
renames = handle_dirs(dirpath, dirnames, really_do_it)
handle_files(dirpath, filenames, really_do_it)
# print(dirpath, dirnames, filenames)
# because of topdown=True, we can inform `os.walk` about the new
# directory names by replacing them in `dirnames`:
for old_name, new_name in renames.items():
idx = dirnames.index(old_name)
dirnames[idx] = new_name
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment