USERNAME = 'your user name here' PASSWD = 'your password here' DOC_NAME = 'Document Name' import sys import gdata.docs import gdata.docs.client import gdata.docs.data import gdata.docs.service import gdata.spreadsheet.service import re import datetime import time ############################################################ # CLASS GOOGLESPREADSHEET # ############################################################ class GoogleSpreadsheet(object): '''Wrap up google gdata methods to access a Google spreadsheet''' def __init__(self, email, passwd): # Connect to Google account self.gd_client = gdata.spreadsheet.service.SpreadsheetsService() self.gd_client.email = email self.gd_client.password = passwd self.gd_client.source = 'GoogleSpreadsheet wrapper class' self.gd_client.ProgrammaticLogin() def create_spreadsheet(self, spreadsheet_title): '''Create a new spreadsheet, given its title, return spreadsheet key if successful. ''' client = gdata.docs.client.DocsClient() client.http_client.debug = False client.client_login(self.gd_client.email, self.gd_client.password, 'GoogleSpreadsheet wrapper class') # Create spreadsheet doc = gdata.docs.data.Resource(type='spreadsheet', title=spreadsheet_title) document = client.create_resource(doc) spreadsheet_key = document.GetId().split("%3A")[1] return spreadsheet_key def get_spreadsheet_key(self, spreadsheet_title): '''Get spreadsheet id by spreadsheet title''' doc_query = gdata.spreadsheet.service.DocumentQuery() doc_query['title'] = spreadsheet_title doc_query['title-exact'] = 'true' ss_feed = self.gd_client.GetSpreadsheetsFeed(query=doc_query) spreadsheet_key = ss_feed.entry[0].id.text.rsplit('/',1)[1] return spreadsheet_key def add_worksheet(self, spreadsheet_key, worksheet_title, row_count, col_count): '''Add new worsheet to the spreadsheet identified by its key''' ws = self.gd_client.AddWorksheet(worksheet_title, row_count, col_count, spreadsheet_key) # Get worksheet as SpreadsheatsWorksheet and return its id return ws.id.text.rsplit('/',1)[1] def get_worksheet_id(self, spreadsheet_key, worksheet_name): '''Get worksheet id by spreadsheet key and workbook name''' ws_feed = self.gd_client.GetWorksheetsFeed(spreadsheet_key) d = self.worksheet_dict(spreadsheet_key) try: id = d[worksheet_name] except KeyError, e: print e id = None return id def worksheet_dict(self, spreadsheet_key): '''Create dictionary containing worsheet's ids with keys equal to their names if spreadsheet_key is given. ''' ws_feed = self.gd_client.GetWorksheetsFeed(spreadsheet_key) d = dict() for i, entry in enumerate(ws_feed.entry): d[entry.title.text] = entry.id.text.split('/')[-1] return d def _get_worksheet_url(self, spreadsheet_key, worksheet_id): ws_feed = self.gd_client.GetWorksheetsFeed(spreadsheet_key) url = None for i, entry in enumerate(ws_feed.entry): if entry.id.text.split('/')[-1] == worksheet_id: url = entry.link[-1].href break return url def delete_worksheet(self, spreadsheet_key, worksheet_id): url = self._get_worksheet_url(spreadsheet_key, worksheet_id) self.gd_client.DeleteWorksheet(url=url) def _get_num_cols(self, range): '''Compute the number of rows in a range''' # It's ugly, but I did not find a method in the API that does it pattern = re.compile('[\d]*$') first, second = range.split(':') m1 = re.search(pattern, first) m2 = re.search(pattern, second) ind1 = int(first[m1.start():]) ind2 = int(second[m2.start():]) num_cols = ind2 - ind1 + 1 return num_cols def get_range(self, spreadsheet_key, worksheet_id, rng): '''Return cell range as a list of tuples, so that each element of the tuple represents a row of data in the spreadsheet. The range should be provided following standard R1C1 notation. This method always returns a rectangular array. Empty cells are returned as None. Parameters: spreadhseet_id -- spreadhseet id worksheet_id -- worksheet rng -- range of cells, e.g. 'A2:R23' ''' # If range is given if rng is not None: cell_query = gdata.spreadsheet.service.CellQuery() cell_query['range'] = rng cell_query['return-empty'] = 'true' # oh, my! else: cell_query = None cell_feed = self.gd_client.GetCellsFeed(spreadsheet_key, worksheet_id, query=cell_query) entry = cell_feed.entry num_cols = self._get_num_cols(rng) num_rows = len(entry) / num_cols cells = [] for ii in range(len(entry)): cells.append(entry[ii].content.text) # Now reshape it to create num_rowsXnum_cols lists of tuples cells = zip(*[iter(cells)]*num_rows) return cells def update_cells(self, spreadsheet_key, worksheet_id, data, row=1, col=1): '''Update data in a worksheets specified by keys and sheets id, starting from position specified by row and col (default 1, 1) Parameters: data -- is a list of lists, so that each element represents a row, data must be a rectangular array (all rows are of the same length) Warning: this method is painfully slow, use insert_row whenever possible. ''' num_rows = len(data) num_cols = len(data[0]) for i in range(num_rows): for j in range(num_cols): cell = self.gd_client.UpdateCell(i+row, j+col, str(data[i][j]), spreadsheet_key, worksheet_id) if isinstance(cell, gdata.spreadsheet.SpreadsheetsCell) == False: print 'Error updating cell R{0}C{1}'.format(i+row, j+col) def __insert_rows(self, spreadsheet_key, worksheet_id, data): '''Insert rows in empty spreadsheet''' hdr = [data[0]] self.update_cells(spreadsheet_key, worksheet_id, hdr) # Now we may use InsertRow for i in range(1, len(data)): row_dict = dict() for j in range(len(data[0])): row_dict[str(data[0][j]).lower()] = str(data[i][j]) entry = self.gd_client.InsertRow(row_dict, spreadsheet_key, worksheet_id) if isinstance(entry, gdata.spreadsheet.SpreadsheetsList) == False: print 'Error inserting row #{0}'.format(i) def insert_rows(self, spreadsheet_key, worksheet_id, data): '''Insert a row of data in the spreadsheet. Data should be a list of lists, representing a rectangular array, i.e. each list has the same number of elements. ''' list_feed = self.gd_client.GetListFeed(spreadsheet_key, worksheet_id) if len(list_feed.entry) == 0: self.__insert_rows(spreadsheet_key, worksheet_id, data) else: # Delete non-empty rows for i in range(len(list_feed.entry)): self.gd_client.DeleteRow(list_feed.entry[i]) self.__insert_rows(spreadsheet_key, worksheet_id, data) def get_cols(self, spreadsheet_key, worksheet_id): '''Reads entire workbook and returns dictionary, that contains column names as keys and list of column elements as dictionary values. ''' rows = self.gd_client.GetListFeed(spreadsheet_key, worksheet_id) d = dict() # Build dictionary for row in rows.entry: for key in row.custom: if key in d.keys(): d[key].append(row.custom[key].text) else: d[key] = [row.custom[key].text] return d def get_list_query(self, query_str): '''Retun an instance of ListQuery suitable to pass to the get_cols method. You may set query parameters by setting up query keys. Query keys: sq: for general query such as 'name=john&last=smith' orderby: for example: 'column:first' reverse: values are 'true' and 'false', note these are string. ''' list_query = gdata.spreadsheet.service.ListQuery() return list_query if __name__ == '__main__': # Let's roll gs = GoogleSpreadsheet(USERNAME, PASSWD) sid = gs.get_spreadsheet_key('Test spreadsheet') ws = gs.get_worksheet_id(sid, 'Test Worksheet') print sid print ws data = [['Project', 'Start', 'End', 'NumberOfBugs'], ['Trinity', '2012/05/15', '2012/06/15', 45], ['Trinity', '2012/06/15', '2012/07/15', 29], ['Trinity', '2012/07/15', '2012/08/15', 13], ['Trinity', '2012/08/15', '2012/09/15', 11], ['Trinity', '2012/09/15', '2012/10/15', 15], ['Trinity', '2012/10/15', '2012/11/15', 13], ['Trinity', '2012/11/15', '2012/12/15', 12], ['Trinity', '2012/12/15', '2013/01/15', 4], ['Trinity', '2013/01/15', '2013/02/15', 33], ['Trinity', '2013/02/15', '2013/03/15', 51], ['Trinity', '2013/03/15', '2013/04/15', 19], ['Neo', '2012/05/15', '2012/06/15', 3], ['Neo', '2012/06/15', '2012/07/15', 2], ['Neo', '2012/07/15', '2012/08/15', 3], ['Neo', '2012/08/15', '2012/09/15', 3], ['Neo', '2012/09/15', '2012/10/15', 2], ['Neo', '2012/10/15', '2012/11/15', 1], ['Neo', '2012/11/15', '2012/12/15', 4], ['Neo', '2012/12/15', '2013/01/15', 1], ['Neo', '2013/01/15', '2013/02/15', 0], ['Neo', '2013/02/15', '2013/03/15', 2], ['Neo', '2013/03/15', '2013/04/15', 0], ['OSG', '2012/05/15', '2012/06/15', 16], ['OSG', '2012/06/15', '2012/07/15', 19], ['OSG', '2012/07/15', '2012/08/15', 4], ['OSG', '2012/08/15', '2012/09/15', 8], ['OSG', '2012/09/15', '2012/10/15', 1], ['OSG', '2012/10/15', '2012/11/15', 3], ['OSG', '2012/11/15', '2012/12/15', 15], ['OSG', '2012/12/15', '2013/01/15', 7], ['OSG', '2013/01/15', '2013/02/15', 14], ['OSG', '2013/02/15', '2013/03/15', 8], ['OSG', '2013/03/15', '2013/04/15', 7], ['Morpheus', '2012/05/15', '2012/06/15', 2], ['Morpheus', '2012/06/15', '2012/07/15', 5], ['Morpheus', '2012/07/15', '2012/08/15', 8], ['Morpheus', '2012/08/15', '2012/09/15', 6], ['Morpheus', '2012/09/15', '2012/10/15', 3], ['Morpheus', '2012/10/15', '2012/11/15', 4], ['Morpheus', '2012/11/15', '2012/12/15', 3], ['Morpheus', '2012/12/15', '2013/01/15', 1], ['Morpheus', '2013/01/15', '2013/02/15', 5], ['Morpheus', '2013/02/15', '2013/03/15', 7], ['Morpheus', '2013/03/15', '2013/04/15', 10]] print data gs.insert_rows(sid, ws, data)