Skip to content

Instantly share code, notes, and snippets.

@mcsquaredjr
Created August 6, 2014 15:53
Show Gist options
  • Save mcsquaredjr/5312d3f16eb3b29f6381 to your computer and use it in GitHub Desktop.
Save mcsquaredjr/5312d3f16eb3b29f6381 to your computer and use it in GitHub Desktop.

Revisions

  1. mcsquaredjr created this gist Aug 6, 2014.
    243 changes: 243 additions & 0 deletions google_spreadsheet.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,243 @@

    USERNAME = 'your user name here'
    PASSWD = 'your password here'
    DOC_NAME = 'Document Name'

    import sys

    import gdata.docs
    import gdata.docs.client
    import gdata.docs.data
    import gdata.docs.service
    import gdata.spreadsheet.service
    import re
    import datetime
    import time


    ############################################################
    # CLASS GOOGLESPREADSHEET #
    ############################################################
    class GoogleSpreadsheet(object):
    '''Wrap up google gdata methods to access a Google spreadsheet'''

    def __init__(self, email, passwd):
    # Connect to Google account
    self.gd_client = gdata.spreadsheet.service.SpreadsheetsService()
    self.gd_client.email = email
    self.gd_client.password = passwd
    self.gd_client.source = 'GoogleSpreadsheet wrapper class'
    self.gd_client.ProgrammaticLogin()


    def create_spreadsheet(self, spreadsheet_title):
    '''Create a new spreadsheet, given its title, return spreadsheet
    key if successful.
    '''
    client = gdata.docs.client.DocsClient()
    client.http_client.debug = False
    client.client_login(self.gd_client.email,
    self.gd_client.password,
    'GoogleSpreadsheet wrapper class')
    # Create spreadsheet
    doc = gdata.docs.data.Resource(type='spreadsheet', title=spreadsheet_title)
    document = client.create_resource(doc)
    spreadsheet_key = document.GetId().split("%3A")[1]
    return spreadsheet_key


    def get_spreadsheet_key(self, spreadsheet_title):
    '''Get spreadsheet id by spreadsheet title'''
    doc_query = gdata.spreadsheet.service.DocumentQuery()
    doc_query['title'] = spreadsheet_title
    doc_query['title-exact'] = 'true'
    ss_feed = self.gd_client.GetSpreadsheetsFeed(query=doc_query)
    spreadsheet_key = ss_feed.entry[0].id.text.rsplit('/',1)[1]
    return spreadsheet_key


    def add_worksheet(self, spreadsheet_key, worksheet_title, row_count, col_count):
    '''Add new worsheet to the spreadsheet identified by its key'''
    ws = self.gd_client.AddWorksheet(worksheet_title,
    row_count, col_count,
    spreadsheet_key)
    # Get worksheet as SpreadsheatsWorksheet and return its id
    return ws.id.text.rsplit('/',1)[1]


    def get_worksheet_id(self, spreadsheet_key, worksheet_name):
    '''Get worksheet id by spreadsheet key and workbook name'''
    ws_feed = self.gd_client.GetWorksheetsFeed(spreadsheet_key)
    d = self.worksheet_dict(spreadsheet_key)
    try:
    id = d[worksheet_name]
    except KeyError, e:
    print e
    id = None
    return id


    def worksheet_dict(self, spreadsheet_key):
    '''Create dictionary containing worsheet's ids with keys equal to
    their names if spreadsheet_key is given.
    '''
    ws_feed = self.gd_client.GetWorksheetsFeed(spreadsheet_key)
    d = dict()
    for i, entry in enumerate(ws_feed.entry):
    d[entry.title.text] = entry.id.text.split('/')[-1]
    return d


    def _get_worksheet_url(self, spreadsheet_key, worksheet_id):
    ws_feed = self.gd_client.GetWorksheetsFeed(spreadsheet_key)
    url = None
    for i, entry in enumerate(ws_feed.entry):
    if entry.id.text.split('/')[-1] == worksheet_id:
    url = entry.link[-1].href
    break
    return url


    def delete_worksheet(self, spreadsheet_key, worksheet_id):
    url = self._get_worksheet_url(spreadsheet_key, worksheet_id)
    self.gd_client.DeleteWorksheet(url=url)


    def _get_num_cols(self, range):
    '''Compute the number of rows in a range'''
    # It's ugly, but I did not find a method in the API that does it
    pattern = re.compile('[\d]*$')
    first, second = range.split(':')
    m1 = re.search(pattern, first)
    m2 = re.search(pattern, second)
    ind1 = int(first[m1.start():])
    ind2 = int(second[m2.start():])
    num_cols = ind2 - ind1 + 1
    return num_cols


    def get_range(self, spreadsheet_key, worksheet_id, rng):
    '''Return cell range as a list of tuples, so that each element of
    the tuple represents a row of data in the spreadsheet. The range
    should be provided following standard R1C1 notation. This method
    always returns a rectangular array. Empty cells are returned as
    None.
    Parameters:
    spreadhseet_id -- spreadhseet id
    worksheet_id -- worksheet
    rng -- range of cells, e.g. 'A2:R23'
    '''
    # If range is given
    if rng is not None:
    cell_query = gdata.spreadsheet.service.CellQuery()
    cell_query['range'] = rng
    cell_query['return-empty'] = 'true' # oh, my!
    else:
    cell_query = None

    cell_feed = self.gd_client.GetCellsFeed(spreadsheet_key,
    worksheet_id,
    query=cell_query)
    entry = cell_feed.entry
    num_cols = self._get_num_cols(rng)
    num_rows = len(entry) / num_cols
    cells = []
    for ii in range(len(entry)):
    cells.append(entry[ii].content.text)
    # Now reshape it to create num_rowsXnum_cols lists of tuples
    cells = zip(*[iter(cells)]*num_rows)
    return cells


    def update_cells(self, spreadsheet_key, worksheet_id, data, row=1, col=1):
    '''Update data in a worksheets specified by keys and sheets id,
    starting from position specified by row and col (default 1, 1)
    Parameters:
    data -- is a list of lists, so that each element represents a row,
    data must be a rectangular array (all rows are of the same length)
    Warning: this method is painfully slow, use insert_row whenever possible.
    '''
    num_rows = len(data)
    num_cols = len(data[0])
    for i in range(num_rows):
    for j in range(num_cols):
    cell = self.gd_client.UpdateCell(i+row,
    j+col,
    str(data[i][j]),
    spreadsheet_key,
    worksheet_id)

    if isinstance(cell, gdata.spreadsheet.SpreadsheetsCell) == False:
    print 'Error updating cell R{0}C{1}'.format(i+row, j+col)

    def __insert_rows(self, spreadsheet_key, worksheet_id, data):
    '''Insert rows in empty spreadsheet'''
    hdr = [data[0]]
    self.update_cells(spreadsheet_key, worksheet_id, hdr)
    # Now we may use InsertRow
    for i in range(1, len(data)):
    row_dict = dict()
    for j in range(len(data[0])):
    row_dict[str(data[0][j]).lower()] = str(data[i][j])
    entry = self.gd_client.InsertRow(row_dict, spreadsheet_key, worksheet_id)
    if isinstance(entry, gdata.spreadsheet.SpreadsheetsList) == False:
    print 'Error inserting row #{0}'.format(i)


    def insert_rows(self, spreadsheet_key, worksheet_id, data):
    '''Insert a row of data in the spreadsheet. Data should be a list of
    lists, representing a rectangular array, i.e. each list has the same
    number of elements.
    '''
    list_feed = self.gd_client.GetListFeed(spreadsheet_key, worksheet_id)
    if len(list_feed.entry) == 0:
    self.__insert_rows(spreadsheet_key, worksheet_id, data)
    else:
    # Delete non-empty rows
    for i in range(len(list_feed.entry)):
    self.gd_client.DeleteRow(list_feed.entry[i])
    self.__insert_rows(spreadsheet_key, worksheet_id, data)


    def get_cols(self, spreadsheet_key, worksheet_id):
    '''Reads entire workbook and returns dictionary, that contains
    column names as keys and list of column elements as dictionary
    values.
    '''
    rows = self.gd_client.GetListFeed(spreadsheet_key, worksheet_id)
    d = dict()
    # Build dictionary
    for row in rows.entry:
    for key in row.custom:
    if key in d.keys():
    d[key].append(row.custom[key].text)
    else:
    d[key] = [row.custom[key].text]
    return d


    def get_list_query(self, query_str):
    '''Retun an instance of ListQuery suitable to pass to the get_cols
    method. You may set query parameters by setting up query keys.
    Query keys:
    sq: for general query such as 'name=john&last=smith'
    orderby: for example: 'column:first'
    reverse: values are 'true' and 'false', note these are string.
    '''
    list_query = gdata.spreadsheet.service.ListQuery()
    return list_query



    if __name__ == '__main__':
    # Let's roll
    gs = GoogleSpreadsheet(USERNAME, PASSWD)
    sid = gs.get_spreadsheet_key('Test spreadsheet')
    ws = gs.get_worksheet_id(sid, 'Test Worksheet')
    print sid
    print ws
    data = [['Project', 'Start', 'End', 'NumberOfBugs'], ['Trinity', '2012/05/15', '2012/06/15', 45], ['Trinity', '2012/06/15', '2012/07/15', 29], ['Trinity', '2012/07/15', '2012/08/15', 13], ['Trinity', '2012/08/15', '2012/09/15', 11], ['Trinity', '2012/09/15', '2012/10/15', 15], ['Trinity', '2012/10/15', '2012/11/15', 13], ['Trinity', '2012/11/15', '2012/12/15', 12], ['Trinity', '2012/12/15', '2013/01/15', 4], ['Trinity', '2013/01/15', '2013/02/15', 33], ['Trinity', '2013/02/15', '2013/03/15', 51], ['Trinity', '2013/03/15', '2013/04/15', 19], ['Neo', '2012/05/15', '2012/06/15', 3], ['Neo', '2012/06/15', '2012/07/15', 2], ['Neo', '2012/07/15', '2012/08/15', 3], ['Neo', '2012/08/15', '2012/09/15', 3], ['Neo', '2012/09/15', '2012/10/15', 2], ['Neo', '2012/10/15', '2012/11/15', 1], ['Neo', '2012/11/15', '2012/12/15', 4], ['Neo', '2012/12/15', '2013/01/15', 1], ['Neo', '2013/01/15', '2013/02/15', 0], ['Neo', '2013/02/15', '2013/03/15', 2], ['Neo', '2013/03/15', '2013/04/15', 0], ['OSG', '2012/05/15', '2012/06/15', 16], ['OSG', '2012/06/15', '2012/07/15', 19], ['OSG', '2012/07/15', '2012/08/15', 4], ['OSG', '2012/08/15', '2012/09/15', 8], ['OSG', '2012/09/15', '2012/10/15', 1], ['OSG', '2012/10/15', '2012/11/15', 3], ['OSG', '2012/11/15', '2012/12/15', 15], ['OSG', '2012/12/15', '2013/01/15', 7], ['OSG', '2013/01/15', '2013/02/15', 14], ['OSG', '2013/02/15', '2013/03/15', 8], ['OSG', '2013/03/15', '2013/04/15', 7], ['Morpheus', '2012/05/15', '2012/06/15', 2], ['Morpheus', '2012/06/15', '2012/07/15', 5], ['Morpheus', '2012/07/15', '2012/08/15', 8], ['Morpheus', '2012/08/15', '2012/09/15', 6], ['Morpheus', '2012/09/15', '2012/10/15', 3], ['Morpheus', '2012/10/15', '2012/11/15', 4], ['Morpheus', '2012/11/15', '2012/12/15', 3], ['Morpheus', '2012/12/15', '2013/01/15', 1], ['Morpheus', '2013/01/15', '2013/02/15', 5], ['Morpheus', '2013/02/15', '2013/03/15', 7], ['Morpheus', '2013/03/15', '2013/04/15', 10]]
    print data

    gs.insert_rows(sid, ws, data)