Skip to content

Instantly share code, notes, and snippets.

@wgins
Forked from marinamixpanel/people2csv.py
Last active May 31, 2018 19:05
Show Gist options
  • Select an option

  • Save wgins/b21f4f0c2e160f7f95af to your computer and use it in GitHub Desktop.

Select an option

Save wgins/b21f4f0c2e160f7f95af to your computer and use it in GitHub Desktop.

Revisions

  1. wgins revised this gist Sep 5, 2017. 1 changed file with 10 additions and 1 deletion.
    11 changes: 10 additions & 1 deletion people2csv.py
    Original file line number Diff line number Diff line change
    @@ -133,11 +133,20 @@ def export_csv(self, outfilename, fname):
    selector = r''

    behaviors = r''

    '''
    Optionally export just specific properties. Exports will always include $distinct_id and $last_seen
    output_properties = ['property1', 'property2', 'property3']
    '''
    output_properties = []

    if not behaviors:
    parameters = {'selector': selector}
    else:
    time_offset = int(raw_input("Project time offset from GMT (ex. PST = -8): "))
    parameters = {'selector': selector, 'behaviors': behaviors, 'as_of_timestamp': int(time.time()) + (time_offset * 3600)}

    if output_properties:
    parameters['output_properties'] = output_properties

    api.get_and_write_results(parameters)
    api.get_and_write_results(parameters)
  2. wgins revised this gist Aug 2, 2017. 1 changed file with 4 additions and 2 deletions.
    6 changes: 4 additions & 2 deletions people2csv.py
    Original file line number Diff line number Diff line change
    @@ -128,8 +128,10 @@ def export_csv(self, outfilename, fname):
    selector = '(datetime(1458587013 - 86400) > properties["Created"] and behaviors["behavior_79"] > 0'
    behaviors = '[{"window": "90d", "name": "behavior_79", "event_selectors": [{"event": "Edit Colors"}]}]'
    '''
    selector = ''
    # Leave 'r' before the behaviors string so that it's interpreted as a string literal to handle escaped quotes

    # Leave 'r' before the behaviors and selector strings so that they are interpreted as a string literals to handle escaped quotes
    selector = r''

    behaviors = r''

    if not behaviors:
  3. Will Ginsberg revised this gist Apr 11, 2016. 1 changed file with 70 additions and 103 deletions.
    173 changes: 70 additions & 103 deletions people2csv.py
    Original file line number Diff line number Diff line change
    @@ -1,70 +1,31 @@
    ''' people export'''

    import hashlib
    import time
    import urllib #for url encoding
    import urllib2 #for sending requests
    import base64
    import sys
    import csv
    import sys
    import time
    import urllib # for url encoding
    import urllib2 # for sending requests

    try:
    import json
    import json
    except ImportError:
    import simplejson as json

    import simplejson as json

    class Mixpanel(object):

    def __init__(self, api_key, api_secret, token):
    self.api_key = api_key
    def __init__(self, api_secret):
    self.api_secret = api_secret
    self.token = token

    def request(self, params, format = 'json'):
    def request(self, params, format='json'):
    '''let's craft the http request'''
    params['api_key']=self.api_key
    params['expire'] = int(time.time())+600 # 600 is ten minutes from now
    if 'sig' in params: del params['sig']
    params['sig'] = self.hash_args(params)

    request_url = 'https://mixpanel.com/api/2.0/engage/?' + self.unicode_urlencode(params)

    request = urllib.urlopen(request_url)
    data = request.read()

    #print request_url

    return data

    def hash_args(self, args, secret=None):
    '''Hash dem arguments in the proper way
    join keys - values and append a secret -> md5 it'''

    for a in args:
    if isinstance(args[a], list): args[a] = json.dumps(args[a])

    args_joined = ''
    for a in sorted(args.keys()):
    if isinstance(a, unicode):
    args_joined += a.encode('utf-8')
    else:
    args_joined += str(a)

    args_joined += "="

    if isinstance(args[a], unicode):
    args_joined += args[a].encode('utf-8')
    else:
    args_joined += str(args[a])

    hash = hashlib.md5(args_joined)

    if secret:
    hash.update(secret)
    elif self.api_secret:
    hash.update(self.api_secret)
    return hash.hexdigest()
    data = None
    request_url = 'https://mixpanel.com/api/2.0/engage/?'
    request_url = request_url + self.unicode_urlencode(params)
    headers = {'Authorization': 'Basic {encoded_secret}'.format(encoded_secret=base64.b64encode(self.api_secret))}
    request = urllib2.Request(request_url, data, headers)
    response = urllib2.urlopen(request, timeout=120)
    return response.read()

    def unicode_urlencode(self, params):
    ''' Convert stuff to json format and correctly handle unicode url parameters'''
    @@ -78,16 +39,47 @@ def unicode_urlencode(self, params):
    result = urllib.urlencode([(k, isinstance(v, unicode) and v.encode('utf-8') or v) for k, v in params])
    return result

    def get_and_write_results(self, params):
    response = api.request(params)
    parameters['session_id'] = json.loads(response)['session_id']
    parameters['page'] = 0
    global_total = json.loads(response)['total']

    print "Session id is %s \n" % parameters['session_id']
    print "Here are the # of people %d" % global_total

    paged = self._page_results(response, parameters, global_total)
    self.export_csv("people_export_" + str(int(time.time())) + ".csv", paged)

    def _page_results(self, response, parameters, global_total):

    fname = "people_export_" + str(int(time.time())) + ".txt"
    parameters['page'] = 0
    has_results = True
    total = 0
    while has_results:
    responser = json.loads(response)['results']
    total += len(responser)
    has_results = len(responser) == 1000
    self._write_results(responser, fname)
    print "%d / %d" % (total, global_total)
    parameters['page'] += 1
    if has_results:
    response = api.request(parameters)
    return fname

    def _write_results(self, results, fname):
    with open(fname, 'a') as f:
    for data in results:
    f.write(json.dumps(data) + '\n')

    def export_csv(self, outfilename, fname):
    """
    takes the json and returns a csv file
    """
    '''takes a file name of a file of json objects and the desired name of the csv file that will be written'''
    subkeys = set()
    with open(fname,'rb') as r:
    with open(fname, 'rb') as r:
    with open(outfilename, 'wb') as w:
    # Get all properties (will use this to create the header)
    for line in r:
    for line in r:
    try:
    subkeys.update(set(json.loads(line)['$properties'].keys()))
    except:
    @@ -96,14 +88,14 @@ def export_csv(self, outfilename, fname):
    # Create the header
    header = ['$distinct_id']
    for key in subkeys:
    header.append(key)
    header.append(key.encode('utf-8'))

    # Create the writer and write the header
    writer = csv.writer(w)
    writer.writerow(header)

    #Return to the top of the file, then write the events out, one per row
    r.seek(0,0)
    # Return to the top of the file, then write the events out, one per row
    r.seek(0, 0)
    for line in r:
    entry = json.loads(line)
    row = []
    @@ -121,54 +113,29 @@ def export_csv(self, outfilename, fname):
    row.append("")
    writer.writerow(row)

    api_key = raw_input("API Key: ")
    api_secret = raw_input("API Secret: ")
    api_token = raw_input("API token: ")

    if __name__ == '__main__':
    if len(sys.argv) > 1:
    api_secret = sys.argv[1]
    else:
    api_secret = raw_input("API Secret: ")

    api = Mixpanel(
    api_key = api_key,
    api_secret = api_secret,
    token = api_token,
    )

    '''Here is the place to define your selector to target only the users that you're after
    selector = '(datetime(1458587013 - 86400) > properties["Created"] and behaviors["behavior_79"] > 0'
    behaviors = '[{"window": "90d", "name": "behavior_79", "event_selectors": [{"event": "Edit Colors"}]}]'
    api_secret=api_secret
    )

    '''
    Here is the place to define your selector to target only the users that you're after
    selector = '(datetime(1458587013 - 86400) > properties["Created"] and behaviors["behavior_79"] > 0'
    behaviors = '[{"window": "90d", "name": "behavior_79", "event_selectors": [{"event": "Edit Colors"}]}]'
    '''
    selector = ''
    #Leave 'r' before the behaviors string so that it's interpreted as a string literal to handle escaped quotes
    # Leave 'r' before the behaviors string so that it's interpreted as a string literal to handle escaped quotes
    behaviors = r''

    if not behaviors:
    parameters = {'selector':selector}
    parameters = {'selector': selector}
    else:
    time_offset = int(raw_input("Project time offset from GMT (ex. PST = -8): "))
    parameters = {'selector': selector, 'behaviors': behaviors, 'as_of_timestamp': int(time.time()) + (time_offset * 3600) }

    response = api.request(parameters)

    parameters['session_id'] = json.loads(response)['session_id']
    parameters['page']=0
    global_total = json.loads(response)['total']

    print "Session id is %s \n" % parameters['session_id']
    print "Here are the # of people %d" % global_total
    fname = "people.txt"
    has_results = True
    total = 0
    with open(fname,'w') as f:
    while has_results:
    responser = json.loads(response)['results']
    total += len(responser)
    has_results = len(responser) == 1000
    for data in responser:
    f.write(json.dumps(data)+'\n')
    print "%d / %d" % (total,global_total)
    parameters['page'] += 1
    if has_results:
    response = api.request(parameters)

    '''specify your output filename here'''
    api.export_csv("people_export_"+str(int(time.time()))+".csv", fname)
    parameters = {'selector': selector, 'behaviors': behaviors, 'as_of_timestamp': int(time.time()) + (time_offset * 3600)}

    api.get_and_write_results(parameters)
  4. wgins revised this gist Mar 31, 2016. 1 changed file with 1 addition and 1 deletion.
    2 changes: 1 addition & 1 deletion people2csv.py
    Original file line number Diff line number Diff line change
    @@ -144,7 +144,7 @@ def export_csv(self, outfilename, fname):
    parameters = {'selector':selector}
    else:
    time_offset = int(raw_input("Project time offset from GMT (ex. PST = -8): "))
    parameters = {'selector': selector, 'behaviors': behaviors, 'as_of_timestamp': int(time.time()) - (time_offset * 3600) }
    parameters = {'selector': selector, 'behaviors': behaviors, 'as_of_timestamp': int(time.time()) + (time_offset * 3600) }

    response = api.request(parameters)

  5. wgins revised this gist Mar 25, 2016. 1 changed file with 2 additions and 1 deletion.
    3 changes: 2 additions & 1 deletion people2csv.py
    Original file line number Diff line number Diff line change
    @@ -137,7 +137,8 @@ def export_csv(self, outfilename, fname):
    behaviors = '[{"window": "90d", "name": "behavior_79", "event_selectors": [{"event": "Edit Colors"}]}]'
    '''
    selector = ''
    behaviors = ''
    #Leave 'r' before the behaviors string so that it's interpreted as a string literal to handle escaped quotes
    behaviors = r''

    if not behaviors:
    parameters = {'selector':selector}
  6. Will Ginsberg revised this gist Mar 22, 2016. 1 changed file with 19 additions and 6 deletions.
    25 changes: 19 additions & 6 deletions people2csv.py
    Original file line number Diff line number Diff line change
    @@ -121,17 +121,30 @@ def export_csv(self, outfilename, fname):
    row.append("")
    writer.writerow(row)

    api_key = raw_input("API Key: ")
    api_secret = raw_input("API Secret: ")
    api_token = raw_input("API token: ")

    if __name__ == '__main__':
    api = Mixpanel(
    api_key = raw_input('API Key: ' ),
    api_secret = raw_input('API Secret: '),
    token = raw_input('Token: ')
    api_key = api_key,
    api_secret = api_secret,
    token = api_token,
    )

    '''Here is the place to define your selector to target only the users that you're after'''
    '''parameters = {'selector':'(properties["$email"] == "Albany") or (properties["$city"] == "Alexandria")'}'''
    parameters = {'selector':''}
    '''Here is the place to define your selector to target only the users that you're after
    selector = '(datetime(1458587013 - 86400) > properties["Created"] and behaviors["behavior_79"] > 0'
    behaviors = '[{"window": "90d", "name": "behavior_79", "event_selectors": [{"event": "Edit Colors"}]}]'
    '''
    selector = ''
    behaviors = ''

    if not behaviors:
    parameters = {'selector':selector}
    else:
    time_offset = int(raw_input("Project time offset from GMT (ex. PST = -8): "))
    parameters = {'selector': selector, 'behaviors': behaviors, 'as_of_timestamp': int(time.time()) - (time_offset * 3600) }

    response = api.request(parameters)

    parameters['session_id'] = json.loads(response)['session_id']
  7. @marinamixpanel marinamixpanel revised this gist Mar 15, 2016. 1 changed file with 1 addition and 1 deletion.
    2 changes: 1 addition & 1 deletion people2csv.py
    Original file line number Diff line number Diff line change
    @@ -28,7 +28,7 @@ def request(self, params, format = 'json'):
    if 'sig' in params: del params['sig']
    params['sig'] = self.hash_args(params)

    request_url = 'http://mixpanel.com/api/2.0/engage/?' + self.unicode_urlencode(params)
    request_url = 'https://mixpanel.com/api/2.0/engage/?' + self.unicode_urlencode(params)

    request = urllib.urlopen(request_url)
    data = request.read()
  8. @marinamixpanel marinamixpanel revised this gist Feb 15, 2016. 1 changed file with 3 additions and 3 deletions.
    6 changes: 3 additions & 3 deletions people2csv.py
    Original file line number Diff line number Diff line change
    @@ -124,9 +124,9 @@ def export_csv(self, outfilename, fname):

    if __name__ == '__main__':
    api = Mixpanel(
    api_key = '',
    api_secret = '',
    token = ''
    api_key = raw_input('API Key: ' ),
    api_secret = raw_input('API Secret: '),
    token = raw_input('Token: ')
    )

    '''Here is the place to define your selector to target only the users that you're after'''
  9. @marinamixpanel marinamixpanel created this gist Apr 20, 2015.
    160 changes: 160 additions & 0 deletions people2csv.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,160 @@
    ''' people export'''

    import hashlib
    import time
    import urllib #for url encoding
    import urllib2 #for sending requests
    import base64
    import sys
    import csv

    try:
    import json
    except ImportError:
    import simplejson as json


    class Mixpanel(object):

    def __init__(self, api_key, api_secret, token):
    self.api_key = api_key
    self.api_secret = api_secret
    self.token = token

    def request(self, params, format = 'json'):
    '''let's craft the http request'''
    params['api_key']=self.api_key
    params['expire'] = int(time.time())+600 # 600 is ten minutes from now
    if 'sig' in params: del params['sig']
    params['sig'] = self.hash_args(params)

    request_url = 'http://mixpanel.com/api/2.0/engage/?' + self.unicode_urlencode(params)

    request = urllib.urlopen(request_url)
    data = request.read()

    #print request_url

    return data

    def hash_args(self, args, secret=None):
    '''Hash dem arguments in the proper way
    join keys - values and append a secret -> md5 it'''

    for a in args:
    if isinstance(args[a], list): args[a] = json.dumps(args[a])

    args_joined = ''
    for a in sorted(args.keys()):
    if isinstance(a, unicode):
    args_joined += a.encode('utf-8')
    else:
    args_joined += str(a)

    args_joined += "="

    if isinstance(args[a], unicode):
    args_joined += args[a].encode('utf-8')
    else:
    args_joined += str(args[a])

    hash = hashlib.md5(args_joined)

    if secret:
    hash.update(secret)
    elif self.api_secret:
    hash.update(self.api_secret)
    return hash.hexdigest()

    def unicode_urlencode(self, params):
    ''' Convert stuff to json format and correctly handle unicode url parameters'''

    if isinstance(params, dict):
    params = params.items()
    for i, param in enumerate(params):
    if isinstance(param[1], list):
    params[i] = (param[0], json.dumps(param[1]),)

    result = urllib.urlencode([(k, isinstance(v, unicode) and v.encode('utf-8') or v) for k, v in params])
    return result


    def export_csv(self, outfilename, fname):
    """
    takes the json and returns a csv file
    """
    subkeys = set()
    with open(fname,'rb') as r:
    with open(outfilename, 'wb') as w:
    # Get all properties (will use this to create the header)
    for line in r:
    try:
    subkeys.update(set(json.loads(line)['$properties'].keys()))
    except:
    pass

    # Create the header
    header = ['$distinct_id']
    for key in subkeys:
    header.append(key)

    # Create the writer and write the header
    writer = csv.writer(w)
    writer.writerow(header)

    #Return to the top of the file, then write the events out, one per row
    r.seek(0,0)
    for line in r:
    entry = json.loads(line)
    row = []
    try:
    row.append(entry['$distinct_id'])
    except:
    row.append('')

    for subkey in subkeys:
    try:
    row.append((entry['$properties'][subkey]).encode('utf-8'))
    except AttributeError:
    row.append(entry['$properties'][subkey])
    except KeyError:
    row.append("")
    writer.writerow(row)


    if __name__ == '__main__':
    api = Mixpanel(
    api_key = '',
    api_secret = '',
    token = ''
    )

    '''Here is the place to define your selector to target only the users that you're after'''
    '''parameters = {'selector':'(properties["$email"] == "Albany") or (properties["$city"] == "Alexandria")'}'''
    parameters = {'selector':''}
    response = api.request(parameters)

    parameters['session_id'] = json.loads(response)['session_id']
    parameters['page']=0
    global_total = json.loads(response)['total']

    print "Session id is %s \n" % parameters['session_id']
    print "Here are the # of people %d" % global_total
    fname = "people.txt"
    has_results = True
    total = 0
    with open(fname,'w') as f:
    while has_results:
    responser = json.loads(response)['results']
    total += len(responser)
    has_results = len(responser) == 1000
    for data in responser:
    f.write(json.dumps(data)+'\n')
    print "%d / %d" % (total,global_total)
    parameters['page'] += 1
    if has_results:
    response = api.request(parameters)

    '''specify your output filename here'''
    api.export_csv("people_export_"+str(int(time.time()))+".csv", fname)