Skip to content

Instantly share code, notes, and snippets.

@albertyw
Created July 8, 2013 12:37
Show Gist options
  • Select an option

  • Save albertyw/5948443 to your computer and use it in GitHub Desktop.

Select an option

Save albertyw/5948443 to your computer and use it in GitHub Desktop.
Script to find the total size of an ftp directory by recursively opening directories
"""
This script finds the total size of an ftp directory by recursively opening directories
@author: Albert Wang ([email protected])
September 18, 2010
"""
from ftplib import FTP
import re
import sys
class FtpSize():
def __init__(self, server, login, password, directory):
"""
Initialize variables
"""
self.ftp = FTP(server)
self.ftp.login(login, password)
self.ftp.set_pasv(True)
if directory == None:
directory = self.ftp.pwd()
self.human_readable = True
self.current_directory = directory
self.size = 0
self.directory_queue = [self.current_directory]
self.node_name_index = 0
def find_node_name_index(self, line):
self.node_name_index = len(line) - len(self.first_line_node)
def run(self):
"""
Start the recursive calculation
"""
while len(self.directory_queue) > 0:
self.current_directory = self.directory_queue.pop(0)
self.ftp.sendcmd('NOOP')
print self.current_directory
self.calculate_size()
self.ftp.close()
if self.human_readable:
return self.convert_bytes(self.size)
else:
return self.size
def calculate_size(self):
"""
List and parse a directory listing
"""
self.ftp.cwd(self.current_directory)
current_directory_listing = self.ftp.nlst()
if len(current_directory_listing) == 0:
return
self.first_line_node = current_directory_listing[0]
self.ftp.retrlines('LIST', self.parse_line)
self.node_name_index = 0
def parse_line(self, line):
"""
Parse the returned string of the LIST command
Python's retrlines function already splits each line
"""
node_info = line.split()
if not re.match('[-drwx]*', node_info[0]):
pass
if self.node_name_index == 0:
self.find_node_name_index(line)
node_name = node_info[8]
node_name = line[self.node_name_index:len(line)]
if node_name == '.' or node_name == '..':
return
permissions = node_info[0]
if permissions[0] == 'd':
self.directory_queue.append(self.current_directory + '/' + node_name)
self.size += int(node_info[4])
def convert_bytes(self, bytes):
"""
Helper function to convert bytes into readable form (1024 => 1K)
"""
bytes = float(bytes)
if bytes >= 1099511627776:
terabytes = bytes / 1099511627776
size = '%.2fT' % terabytes
elif bytes >= 1073741824:
gigabytes = bytes / 1073741824
size = '%.2fG' % gigabytes
elif bytes >= 1048576:
megabytes = bytes / 1048576
size = '%.2fM' % megabytes
elif bytes >= 1024:
kilobytes = bytes / 1024
size = '%.2fK' % kilobytes
else:
size = '%.2fb' % bytes
return size
def print_help():
print 'python ftpsize.py server login password [directory]'
if __name__ == "__main__":
help_functions = ['help', '-h']
if len(sys.argv) != 4 and len(sys.argv) != 5:
print_help()
sys.exit(0)
if sys.argv[1] in help_functions:
print_help()
sys.exit(0)
server = sys.argv[1]
login = sys.argv[2]
password = sys.argv[3]
if len(sys.argv) == 5:
directory = sys.argv[4]
else:
directory = None
ftp_size = FtpSize(server, login, password, directory)
print ftp_size.run()
@fredkoch3
Copy link

I used python 3.6 on windows 10 to try this script. It worked fine after I removed line 68. I have now idea why it is there:
67 node_name = node_info[8]
68 node_name = line[self.node_name_index:len(line)]
Of course, I had to fix the print statements for python 3.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment