username = 'your email' passwd = 'your pass' doc_name = 'PV data' import sys sys.path.append('../gdata') import gdata.docs import gdata.docs.service import gdata.spreadsheet.service import re, os class GoogleSpreadsheet(object): '''Wrap up google gdata methods to access a Google spreadsheet''' def __init__(self, email, passwd): # Connect to Google account self.gd_client = gdata.spreadsheet.service.SpreadsheetsService() self.gd_client.email = username self.gd_client.password = passwd self.gd_client.source = 'GoogleSpreadsheet wrapper class' self.gd_client.ProgrammaticLogin() def get_spreadsheet_id(self, spreadsheet_title): '''Get spreadsheet id by spreadsheet title''' doc_query = gdata.spreadsheet.service.DocumentQuery() doc_query['title'] = spreadsheet_title doc_query['title-exact'] = 'true' ss_feed = self.gd_client.GetSpreadsheetsFeed(query=doc_query) spreadsheet_id = ss_feed.entry[0].id.text.rsplit('/',1)[1] return spreadsheet_id def get_worksheet_id(self, spreadsheet_id, workbook_name): '''Get worksheet id by spreadsheet id and workbook name''' ws_feed = self.gd_client.GetWorksheetsFeed(spreadsheet_id) d = self.worksheet_dict(spreadsheet_id) try: id = d[workbook_name] except KeyError, e: print e id = None return id def worksheet_dict(self, spreadsheet_id): '''Create dictionary containing worsheet's ids with keys equal to their names if spreadsheet_id is given. ''' ws_feed = self.gd_client.GetWorksheetsFeed(spreadsheet_id) d = dict() for i, entry in enumerate(ws_feed.entry): d[entry.title.text] = entry.id.text.split('/')[-1] return d def _get_num_cols(self, range): '''Compute the number of rows in a range''' # It's ugly, but I did not find a method in the API that does it pattern = re.compile('[\d]*$') first, second = range.split(':') m1 = re.search(pattern, first) m2 = re.search(pattern, second) ind1 = int(first[m1.start():]) ind2 = int(second[m2.start():]) num_cols = ind2 - ind1 + 1 return num_cols def get_range(self, spreadsheet_id, worksheet_id, rng): '''Return cell range as a list of tuples, so that each element of the tuple represents a row of data in the spreadsheet. The range should be provided following standard R1C1 notation. This method always returns a rectangular array. Empty cells are returned as None. Parameters: spreadhseet_id -- spreadhseet id worksheet_id -- worksheet rng -- range of cells, e.g. 'A2:R23' ''' # If range is given if rng is not None: cell_query = gdata.spreadsheet.service.CellQuery() cell_query['range'] = rng cell_query['return-empty'] = 'true' # oh, my! else: cell_query = None cell_feed = self.gd_client.GetCellsFeed(spreadsheet_id, worksheet_id, query=cell_query) entry = cell_feed.entry num_cols = self._get_num_cols(rng) num_rows = len(entry) / num_cols cells = [] for ii in range(len(entry)): cells.append(entry[ii].content.text) # Now reshape it to create num_rowsXnum_cols lists of tuples cells = zip(*[iter(cells)]*num_rows) return cells def get_cols(self, spreadsheet_id, worksheet_id, query=None): '''Reads entire workbook and returns dictionary, that contains column names as keys and list of column elements as dictionary values. ''' if query is None: rows = self.gd_client.GetListFeed(spreadsheet_id, worksheet_id) else: rows = self.gd_client.GetListFeed(spreadsheet_id, worksheet_id, query) d = dict() # Build dictionary for row in rows.entry: for key in row.custom: if key in d.keys(): d[key].append(row.custom[key].text) else: d[key] = [row.custom[key].text] return d def get_list_query(self, query_str): '''Retun an instance of ListQuery suitable to pass to the get_cols method. You may set query parameters by setting up query keys. Query keys: sq: for general query such as 'name=john&last=smith' orderby: for example: 'column:first' reverse: values are 'true' and 'false', note these are string. ''' list_query = gdata.spreadsheet.service.ListQuery() return list_query if __name__ == '__main__': # Let's roll gs = GoogleSpreadsheet(username, passwd) sid = gs.get_spreadsheet_id(doc_name) print sid wid = gs.get_worksheet_id(sid, 'Sheet1') print wid cls = gs.get_range(sid, wid, 'A1:C5') print cls d = gs.get_cols(sid, wid) print d