Created
July 8, 2013 12:37
-
-
Save albertyw/5948443 to your computer and use it in GitHub Desktop.
Script to find the total size of an ftp directory by recursively opening directories
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| """ | |
| This script finds the total size of an ftp directory by recursively opening directories | |
| @author: Albert Wang ([email protected]) | |
| September 18, 2010 | |
| """ | |
| from ftplib import FTP | |
| import re | |
| import sys | |
| class FtpSize(): | |
| def __init__(self, server, login, password, directory): | |
| """ | |
| Initialize variables | |
| """ | |
| self.ftp = FTP(server) | |
| self.ftp.login(login, password) | |
| self.ftp.set_pasv(True) | |
| if directory == None: | |
| directory = self.ftp.pwd() | |
| self.human_readable = True | |
| self.current_directory = directory | |
| self.size = 0 | |
| self.directory_queue = [self.current_directory] | |
| self.node_name_index = 0 | |
| def find_node_name_index(self, line): | |
| self.node_name_index = len(line) - len(self.first_line_node) | |
| def run(self): | |
| """ | |
| Start the recursive calculation | |
| """ | |
| while len(self.directory_queue) > 0: | |
| self.current_directory = self.directory_queue.pop(0) | |
| self.ftp.sendcmd('NOOP') | |
| print self.current_directory | |
| self.calculate_size() | |
| self.ftp.close() | |
| if self.human_readable: | |
| return self.convert_bytes(self.size) | |
| else: | |
| return self.size | |
| def calculate_size(self): | |
| """ | |
| List and parse a directory listing | |
| """ | |
| self.ftp.cwd(self.current_directory) | |
| current_directory_listing = self.ftp.nlst() | |
| if len(current_directory_listing) == 0: | |
| return | |
| self.first_line_node = current_directory_listing[0] | |
| self.ftp.retrlines('LIST', self.parse_line) | |
| self.node_name_index = 0 | |
| def parse_line(self, line): | |
| """ | |
| Parse the returned string of the LIST command | |
| Python's retrlines function already splits each line | |
| """ | |
| node_info = line.split() | |
| if not re.match('[-drwx]*', node_info[0]): | |
| pass | |
| if self.node_name_index == 0: | |
| self.find_node_name_index(line) | |
| node_name = node_info[8] | |
| node_name = line[self.node_name_index:len(line)] | |
| if node_name == '.' or node_name == '..': | |
| return | |
| permissions = node_info[0] | |
| if permissions[0] == 'd': | |
| self.directory_queue.append(self.current_directory + '/' + node_name) | |
| self.size += int(node_info[4]) | |
| def convert_bytes(self, bytes): | |
| """ | |
| Helper function to convert bytes into readable form (1024 => 1K) | |
| """ | |
| bytes = float(bytes) | |
| if bytes >= 1099511627776: | |
| terabytes = bytes / 1099511627776 | |
| size = '%.2fT' % terabytes | |
| elif bytes >= 1073741824: | |
| gigabytes = bytes / 1073741824 | |
| size = '%.2fG' % gigabytes | |
| elif bytes >= 1048576: | |
| megabytes = bytes / 1048576 | |
| size = '%.2fM' % megabytes | |
| elif bytes >= 1024: | |
| kilobytes = bytes / 1024 | |
| size = '%.2fK' % kilobytes | |
| else: | |
| size = '%.2fb' % bytes | |
| return size | |
| def print_help(): | |
| print 'python ftpsize.py server login password [directory]' | |
| if __name__ == "__main__": | |
| help_functions = ['help', '-h'] | |
| if len(sys.argv) != 4 and len(sys.argv) != 5: | |
| print_help() | |
| sys.exit(0) | |
| if sys.argv[1] in help_functions: | |
| print_help() | |
| sys.exit(0) | |
| server = sys.argv[1] | |
| login = sys.argv[2] | |
| password = sys.argv[3] | |
| if len(sys.argv) == 5: | |
| directory = sys.argv[4] | |
| else: | |
| directory = None | |
| ftp_size = FtpSize(server, login, password, directory) | |
| print ftp_size.run() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
I used python 3.6 on windows 10 to try this script. It worked fine after I removed line 68. I have now idea why it is there:
67 node_name = node_info[8]
68 node_name = line[self.node_name_index:len(line)]
Of course, I had to fix the print statements for python 3.