Created
August 6, 2014 15:53
-
-
Save mcsquaredjr/5312d3f16eb3b29f6381 to your computer and use it in GitHub Desktop.
Revisions
-
mcsquaredjr created this gist
Aug 6, 2014 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,243 @@ USERNAME = 'your user name here' PASSWD = 'your password here' DOC_NAME = 'Document Name' import sys import gdata.docs import gdata.docs.client import gdata.docs.data import gdata.docs.service import gdata.spreadsheet.service import re import datetime import time ############################################################ # CLASS GOOGLESPREADSHEET # ############################################################ class GoogleSpreadsheet(object): '''Wrap up google gdata methods to access a Google spreadsheet''' def __init__(self, email, passwd): # Connect to Google account self.gd_client = gdata.spreadsheet.service.SpreadsheetsService() self.gd_client.email = email self.gd_client.password = passwd self.gd_client.source = 'GoogleSpreadsheet wrapper class' self.gd_client.ProgrammaticLogin() def create_spreadsheet(self, spreadsheet_title): '''Create a new spreadsheet, given its title, return spreadsheet key if successful. ''' client = gdata.docs.client.DocsClient() client.http_client.debug = False client.client_login(self.gd_client.email, self.gd_client.password, 'GoogleSpreadsheet wrapper class') # Create spreadsheet doc = gdata.docs.data.Resource(type='spreadsheet', title=spreadsheet_title) document = client.create_resource(doc) spreadsheet_key = document.GetId().split("%3A")[1] return spreadsheet_key def get_spreadsheet_key(self, spreadsheet_title): '''Get spreadsheet id by spreadsheet title''' doc_query = gdata.spreadsheet.service.DocumentQuery() doc_query['title'] = spreadsheet_title doc_query['title-exact'] = 'true' ss_feed = self.gd_client.GetSpreadsheetsFeed(query=doc_query) spreadsheet_key = ss_feed.entry[0].id.text.rsplit('/',1)[1] return spreadsheet_key def add_worksheet(self, spreadsheet_key, worksheet_title, row_count, col_count): '''Add new worsheet to the spreadsheet identified by its key''' ws = self.gd_client.AddWorksheet(worksheet_title, row_count, col_count, spreadsheet_key) # Get worksheet as SpreadsheatsWorksheet and return its id return ws.id.text.rsplit('/',1)[1] def get_worksheet_id(self, spreadsheet_key, worksheet_name): '''Get worksheet id by spreadsheet key and workbook name''' ws_feed = self.gd_client.GetWorksheetsFeed(spreadsheet_key) d = self.worksheet_dict(spreadsheet_key) try: id = d[worksheet_name] except KeyError, e: print e id = None return id def worksheet_dict(self, spreadsheet_key): '''Create dictionary containing worsheet's ids with keys equal to their names if spreadsheet_key is given. ''' ws_feed = self.gd_client.GetWorksheetsFeed(spreadsheet_key) d = dict() for i, entry in enumerate(ws_feed.entry): d[entry.title.text] = entry.id.text.split('/')[-1] return d def _get_worksheet_url(self, spreadsheet_key, worksheet_id): ws_feed = self.gd_client.GetWorksheetsFeed(spreadsheet_key) url = None for i, entry in enumerate(ws_feed.entry): if entry.id.text.split('/')[-1] == worksheet_id: url = entry.link[-1].href break return url def delete_worksheet(self, spreadsheet_key, worksheet_id): url = self._get_worksheet_url(spreadsheet_key, worksheet_id) self.gd_client.DeleteWorksheet(url=url) def _get_num_cols(self, range): '''Compute the number of rows in a range''' # It's ugly, but I did not find a method in the API that does it pattern = re.compile('[\d]*$') first, second = range.split(':') m1 = re.search(pattern, first) m2 = re.search(pattern, second) ind1 = int(first[m1.start():]) ind2 = int(second[m2.start():]) num_cols = ind2 - ind1 + 1 return num_cols def get_range(self, spreadsheet_key, worksheet_id, rng): '''Return cell range as a list of tuples, so that each element of the tuple represents a row of data in the spreadsheet. The range should be provided following standard R1C1 notation. This method always returns a rectangular array. Empty cells are returned as None. Parameters: spreadhseet_id -- spreadhseet id worksheet_id -- worksheet rng -- range of cells, e.g. 'A2:R23' ''' # If range is given if rng is not None: cell_query = gdata.spreadsheet.service.CellQuery() cell_query['range'] = rng cell_query['return-empty'] = 'true' # oh, my! else: cell_query = None cell_feed = self.gd_client.GetCellsFeed(spreadsheet_key, worksheet_id, query=cell_query) entry = cell_feed.entry num_cols = self._get_num_cols(rng) num_rows = len(entry) / num_cols cells = [] for ii in range(len(entry)): cells.append(entry[ii].content.text) # Now reshape it to create num_rowsXnum_cols lists of tuples cells = zip(*[iter(cells)]*num_rows) return cells def update_cells(self, spreadsheet_key, worksheet_id, data, row=1, col=1): '''Update data in a worksheets specified by keys and sheets id, starting from position specified by row and col (default 1, 1) Parameters: data -- is a list of lists, so that each element represents a row, data must be a rectangular array (all rows are of the same length) Warning: this method is painfully slow, use insert_row whenever possible. ''' num_rows = len(data) num_cols = len(data[0]) for i in range(num_rows): for j in range(num_cols): cell = self.gd_client.UpdateCell(i+row, j+col, str(data[i][j]), spreadsheet_key, worksheet_id) if isinstance(cell, gdata.spreadsheet.SpreadsheetsCell) == False: print 'Error updating cell R{0}C{1}'.format(i+row, j+col) def __insert_rows(self, spreadsheet_key, worksheet_id, data): '''Insert rows in empty spreadsheet''' hdr = [data[0]] self.update_cells(spreadsheet_key, worksheet_id, hdr) # Now we may use InsertRow for i in range(1, len(data)): row_dict = dict() for j in range(len(data[0])): row_dict[str(data[0][j]).lower()] = str(data[i][j]) entry = self.gd_client.InsertRow(row_dict, spreadsheet_key, worksheet_id) if isinstance(entry, gdata.spreadsheet.SpreadsheetsList) == False: print 'Error inserting row #{0}'.format(i) def insert_rows(self, spreadsheet_key, worksheet_id, data): '''Insert a row of data in the spreadsheet. Data should be a list of lists, representing a rectangular array, i.e. each list has the same number of elements. ''' list_feed = self.gd_client.GetListFeed(spreadsheet_key, worksheet_id) if len(list_feed.entry) == 0: self.__insert_rows(spreadsheet_key, worksheet_id, data) else: # Delete non-empty rows for i in range(len(list_feed.entry)): self.gd_client.DeleteRow(list_feed.entry[i]) self.__insert_rows(spreadsheet_key, worksheet_id, data) def get_cols(self, spreadsheet_key, worksheet_id): '''Reads entire workbook and returns dictionary, that contains column names as keys and list of column elements as dictionary values. ''' rows = self.gd_client.GetListFeed(spreadsheet_key, worksheet_id) d = dict() # Build dictionary for row in rows.entry: for key in row.custom: if key in d.keys(): d[key].append(row.custom[key].text) else: d[key] = [row.custom[key].text] return d def get_list_query(self, query_str): '''Retun an instance of ListQuery suitable to pass to the get_cols method. You may set query parameters by setting up query keys. Query keys: sq: for general query such as 'name=john&last=smith' orderby: for example: 'column:first' reverse: values are 'true' and 'false', note these are string. ''' list_query = gdata.spreadsheet.service.ListQuery() return list_query if __name__ == '__main__': # Let's roll gs = GoogleSpreadsheet(USERNAME, PASSWD) sid = gs.get_spreadsheet_key('Test spreadsheet') ws = gs.get_worksheet_id(sid, 'Test Worksheet') print sid print ws data = [['Project', 'Start', 'End', 'NumberOfBugs'], ['Trinity', '2012/05/15', '2012/06/15', 45], ['Trinity', '2012/06/15', '2012/07/15', 29], ['Trinity', '2012/07/15', '2012/08/15', 13], ['Trinity', '2012/08/15', '2012/09/15', 11], ['Trinity', '2012/09/15', '2012/10/15', 15], ['Trinity', '2012/10/15', '2012/11/15', 13], ['Trinity', '2012/11/15', '2012/12/15', 12], ['Trinity', '2012/12/15', '2013/01/15', 4], ['Trinity', '2013/01/15', '2013/02/15', 33], ['Trinity', '2013/02/15', '2013/03/15', 51], ['Trinity', '2013/03/15', '2013/04/15', 19], ['Neo', '2012/05/15', '2012/06/15', 3], ['Neo', '2012/06/15', '2012/07/15', 2], ['Neo', '2012/07/15', '2012/08/15', 3], ['Neo', '2012/08/15', '2012/09/15', 3], ['Neo', '2012/09/15', '2012/10/15', 2], ['Neo', '2012/10/15', '2012/11/15', 1], ['Neo', '2012/11/15', '2012/12/15', 4], ['Neo', '2012/12/15', '2013/01/15', 1], ['Neo', '2013/01/15', '2013/02/15', 0], ['Neo', '2013/02/15', '2013/03/15', 2], ['Neo', '2013/03/15', '2013/04/15', 0], ['OSG', '2012/05/15', '2012/06/15', 16], ['OSG', '2012/06/15', '2012/07/15', 19], ['OSG', '2012/07/15', '2012/08/15', 4], ['OSG', '2012/08/15', '2012/09/15', 8], ['OSG', '2012/09/15', '2012/10/15', 1], ['OSG', '2012/10/15', '2012/11/15', 3], ['OSG', '2012/11/15', '2012/12/15', 15], ['OSG', '2012/12/15', '2013/01/15', 7], ['OSG', '2013/01/15', '2013/02/15', 14], ['OSG', '2013/02/15', '2013/03/15', 8], ['OSG', '2013/03/15', '2013/04/15', 7], ['Morpheus', '2012/05/15', '2012/06/15', 2], ['Morpheus', '2012/06/15', '2012/07/15', 5], ['Morpheus', '2012/07/15', '2012/08/15', 8], ['Morpheus', '2012/08/15', '2012/09/15', 6], ['Morpheus', '2012/09/15', '2012/10/15', 3], ['Morpheus', '2012/10/15', '2012/11/15', 4], ['Morpheus', '2012/11/15', '2012/12/15', 3], ['Morpheus', '2012/12/15', '2013/01/15', 1], ['Morpheus', '2013/01/15', '2013/02/15', 5], ['Morpheus', '2013/02/15', '2013/03/15', 7], ['Morpheus', '2013/03/15', '2013/04/15', 10]] print data gs.insert_rows(sid, ws, data)