Skip to content

Instantly share code, notes, and snippets.

@ConnorBaker
Created March 28, 2024 05:44
Show Gist options
  • Save ConnorBaker/afb55c4b99510d4d012904bb03aa24c3 to your computer and use it in GitHub Desktop.
Save ConnorBaker/afb55c4b99510d4d012904bb03aa24c3 to your computer and use it in GitHub Desktop.
find /nix/store/.links -type f -printf "%s\n" | awk --file=filesize_stats.awk
function human_readable(size) {
split("B KB MB GB TB PB", units); # Include PB in the units array to avoid separate handling
unit_idx = 1; # Start with bytes
while (size >= 1024 && unit_idx < length(units)) {
size /= 1024;
unit_idx++;
}
return sprintf("%.6f %s", size, units[unit_idx]);
}
function compute_median(sorted_arr) {
len = length(sorted_arr);
if (len % 2 == 1) {
return sorted_arr[(len + 1) / 2];
} else {
return (sorted_arr[len / 2] + sorted_arr[len / 2 + 1]) / 2.0;
}
}
function count_number_of_zero_byte_files(sorted_arr) {
num_zero_byte_files = 0;
i = 1;
while (sorted_arr[i] == 0) {
num_zero_byte_files++;
i++;
}
return num_zero_byte_files;
}
function print_info() {
asort(sizes_bytes);
num_zero_byte_files = count_number_of_zero_byte_files(sizes_bytes);
min_bytes = sizes_bytes[1];
max_bytes = sizes_bytes[num_files];
mean_bytes = total_size_bytes / num_files;
median_bytes = compute_median(sizes_bytes);
variance_bytes = (total_size_bytes_sqs - (total_size_bytes * total_size_bytes) / num_files) / num_files;
stddev_bytes = sqrt(variance_bytes);
print "Num files:", num_files;
print "Num zero-byte files:", num_zero_byte_files;
print "Total size:", human_readable(total_size_bytes);
print "Min size:", human_readable(min_bytes);
print "Max size:", human_readable(max_bytes);
print "Mean size:", human_readable(mean_bytes);
print "Median size:", human_readable(median_bytes);
print "Standard deviation:", human_readable(stddev_bytes);
}
BEGIN {
num_files = 0;
total_size_bytes = 0;
total_size_bytes_sqs = 0;
}
{
num_files++;
sizes_bytes[num_files] = $1;
total_size_bytes += $1;
total_size_bytes_sqs += $1 * $1;
}
END {
if (num_files > 0) {
print_info();
} else {
print "No files found.";
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment