Last active
May 26, 2024 18:34
-
-
Save mvdoc/c46e050bda45d3cb5b36ed40c77f2c24 to your computer and use it in GitHub Desktop.
Revisions
-
mvdoc revised this gist
May 26, 2024 . 1 changed file with 4 additions and 1 deletion.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -7,7 +7,7 @@ def get_files_with_one_copy(): try: result = subprocess.run(['git-annex', 'find', '--copies=1', '--and', '--not', '--copies=2', '--and', '--in=here'], capture_output=True, text=True, check=True) files = result.stdout.splitlines() return files except subprocess.CalledProcessError as e: @@ -16,6 +16,9 @@ def get_files_with_one_copy(): def get_file_size(file): try: # result = subprocess.run(['git-annex', 'info', file, '--json', '--bytes', '--fast'], capture_output=True, text=True, check=True) # info = json.loads(result.stdout) # return int(info['size']) result = subprocess.run(['du', '-bL', file], capture_output=True, text=True, check=True) return int(result.stdout.split()[0]) except subprocess.CalledProcessError as e: -
mvdoc created this gist
May 26, 2024 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,42 @@ # This script computes the total size of git-annex files with only a single local copy. # It's useful to figure out how much data will be used if all the files were to be archived. import subprocess from tqdm import tqdm import json import os def get_files_with_one_copy(): try: result = subprocess.run(['git-annex', 'find', '--copies=1', '--in=here'], capture_output=True, text=True, check=True) files = result.stdout.splitlines() return files except subprocess.CalledProcessError as e: print(f"Error finding files: {e}") return [] def get_file_size(file): try: result = subprocess.run(['du', '-bL', file], capture_output=True, text=True, check=True) return int(result.stdout.split()[0]) except subprocess.CalledProcessError as e: print(f"Error getting info for {file}: {e}") return 0 except (json.JSONDecodeError, KeyError) as e: print(f"Error parsing info for {file}: {e}") return 0 def main(): files = get_files_with_one_copy() total_size = 0 for file in tqdm(files, desc="Processing files"): size = get_file_size(file) total_size += size human_readable_size = subprocess.run(['numfmt', '--to=iec-i', '--suffix=B', str(total_size)], capture_output=True, text=True).stdout.strip() n_files = len(files) print(f"Total size of {n_files} files with only one copy: {human_readable_size}") if __name__ == "__main__": main()