Skip to content

Instantly share code, notes, and snippets.

@lalitkale
Forked from Chandler/slack_history.py
Created October 3, 2021 15:25
Show Gist options
  • Select an option

  • Save lalitkale/fdfe17d48e723542e5b1462150b150ec to your computer and use it in GitHub Desktop.

Select an option

Save lalitkale/fdfe17d48e723542e5b1462150b150ec to your computer and use it in GitHub Desktop.

Revisions

  1. @Chandler Chandler revised this gist Dec 3, 2016. 1 changed file with 25 additions and 1 deletion.
    26 changes: 25 additions & 1 deletion slack_history.py
    Original file line number Diff line number Diff line change
    @@ -1,7 +1,30 @@
    # MIT License

    # Copyright (c) 2016 Chandler Abraham

    # Permission is hereby granted, free of charge, to any person obtaining a copy
    # of this software and associated documentation files (the "Software"), to deal
    # in the Software without restriction, including without limitation the rights
    # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
    # copies of the Software, and to permit persons to whom the Software is
    # furnished to do so, subject to the following conditions:

    # The above copyright notice and this permission notice shall be included in all
    # copies or substantial portions of the Software.

    # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
    # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
    # SOFTWARE.

    from slacker import Slacker
    import json
    import argparse
    import os

    # This script finds all channels, private channels and direct messages
    # that your user participates in, downloads the complete history for
    # those converations and writes each conversation out to seperate json files.
    @@ -20,7 +43,7 @@
    # https://api.slack.com/web
    #
    # dependencies:
    # pip install slacker #https://github.com/os/slacker
    # pip install slacker # https://github.com/os/slacker
    #
    # usage examples
    # python slack_history.py --token='123token'
    @@ -37,6 +60,7 @@
    # slack.im
    #
    # channelId is the id of the channel/group/im you want to download history for.

    def getHistory(pageableObject, channelId, pageSize = 100):
    messages = []
    lastTimestamp = None
  2. @Chandler Chandler revised this gist Jan 3, 2016. 1 changed file with 1 addition and 1 deletion.
    2 changes: 1 addition & 1 deletion slack_history.py
    Original file line number Diff line number Diff line change
    @@ -199,4 +199,4 @@ def doTestAuth(slack):
    getPrivateChannels(slack, dryRun)

    if not args.skipDirectMessages:
    getDirectMessages(slack, testAuth['user_id'], userIdNameMap, dryRun)
    getDirectMessages(slack, testAuth['user_id'], userIdNameMap, dryRun)
  3. @Chandler Chandler revised this gist Dec 23, 2015. 1 changed file with 33 additions and 14 deletions.
    47 changes: 33 additions & 14 deletions slack_history.py
    Original file line number Diff line number Diff line change
    @@ -1,7 +1,7 @@
    from slacker import Slacker
    import json
    import argparse

    import os
    # This script finds all channels, private channels and direct messages
    # that your user participates in, downloads the complete history for
    # those converations and writes each conversation out to seperate json files.
    @@ -57,6 +57,10 @@ def getHistory(pageableObject, channelId, pageSize = 100):
    break
    return messages

    def mkdir(directory):
    if not os.path.exists(directory):
    os.makedirs(directory)

    # fetch and write history for all public channels
    def getChannels(slack, dryRun):
    channels = slack.channels.list().body['channels']
    @@ -66,51 +70,61 @@ def getChannels(slack, dryRun):
    print(channel['name'])

    if not dryRun:
    parentDir = "channels"
    mkdir(parentDir)
    for channel in channels:
    print("getting history for channel {0}".format(channel['name']))
    fileName = "{parent}/{file}.json".format(parent = parentDir, file = channel['name'])
    messages = getHistory(slack.channels, channel['id'])
    fileName = channel['name'] + ".json"
    channelInfo = slack.channels.info(channel['id']).body['channel']
    with open(fileName, 'w') as outFile:
    print("writing {0} records to {1}".format(len(messages), fileName))
    json.dump(messages, outFile, indent=4)
    json.dump({'channel_info': channelInfo, 'messages': messages }, outFile, indent=4)

    # fetch and write history for all direct message conversations
    # also known as IMs in the slack API.
    def getDirectMessages(slack, userIdNameMap, dryRun):
    def getDirectMessages(slack, ownerId, userIdNameMap, dryRun):
    dms = slack.im.list().body['ims']

    print("\nfound direct messages (1:1) with the following users:")
    for dm in dms:
    print(userIdNameMap.get(dm['user'], dm['user'] + " (name unknown)"))

    if not dryRun:
    parentDir = "direct_messages"
    mkdir(parentDir)
    for dm in dms:
    name = userIdNameMap.get(dm['user'], dm['user'] + " (name unknown)")
    print("getting history for direct messages with {0}".format(name))
    fileName = "{parent}/{file}.json".format(parent = parentDir, file = name)
    messages = getHistory(slack.im, dm['id'])
    fileName = name + ".json"
    channelInfo = {'members': [dm['user'], ownerId]}
    with open(fileName, 'w') as outFile:
    print("writing {0} records to {1}".format(len(messages), fileName))
    json.dump(messages, outFile, indent=4)
    json.dump({'channel_info': channelInfo, 'messages': messages}, outFile, indent=4)

    # fetch and write history for all private channels
    # also known as groups in the slack API.
    def getPrivateChannels(slack, dryRun):
    groups = slack.groups.list().body['groups']

    print("\nfound groups:")
    print("\nfound private channels:")
    for group in groups:
    print("{0}: ({1} members)".format(group['name'], len(group['members'])))

    if not dryRun:
    parentDir = "private_channels"
    mkdir(parentDir)

    for group in groups:
    messages = []
    fileName = group['name'] + ".json"
    print("getting history for group {0} with id {1}".format(group['name'], group['id']))
    print("getting history for private channel {0} with id {1}".format(group['name'], group['id']))
    fileName = "{parent}/{file}.json".format(parent = parentDir, file = group['name'])
    messages = getHistory(slack.groups, group['id'])
    channelInfo = slack.groups.info(group['id']).body['group']
    with open(fileName, 'w') as outFile:
    print("writing {0} records to {1}".format(len(messages), fileName))
    json.dump(messages, outFile, indent=4)
    json.dump({'channel_info': channelInfo, 'messages': messages}, outFile, indent=4)

    # fetch all users for the channel and return a map userId -> userName
    def getUserMap(slack):
    @@ -128,6 +142,7 @@ def doTestAuth(slack):
    teamName = testAuth['team']
    currentUser = testAuth['user']
    print("Successfully authenticated for team {0} and user {1} ".format(teamName, currentUser))
    return testAuth

    if __name__ == "__main__":
    parser = argparse.ArgumentParser(description='download slack history')
    @@ -169,9 +184,13 @@ def doTestAuth(slack):
    dryRun = args.dryRun

    if not dryRun:
    with open('user_id_to_username_map.json', 'w') as outFile:
    print("writing userid to username mapping")
    json.dump(userIdNameMap, outFile, indent=4)
    with open('metadata.json', 'w') as outFile:
    print("writing metadata")
    metadata = {
    'auth_info': testAuth,
    'users': userIdNameMap
    }
    json.dump(metadata, outFile, indent=4)

    if not args.skipChannels:
    getChannels(slack, dryRun)
    @@ -180,4 +199,4 @@ def doTestAuth(slack):
    getPrivateChannels(slack, dryRun)

    if not args.skipDirectMessages:
    getDirectMessages(slack, userIdNameMap, dryRun)
    getDirectMessages(slack, testAuth['user_id'], userIdNameMap, dryRun)
  4. @Chandler Chandler revised this gist Dec 22, 2015. 1 changed file with 5 additions and 0 deletions.
    5 changes: 5 additions & 0 deletions slack_history.py
    Original file line number Diff line number Diff line change
    @@ -168,6 +168,11 @@ def doTestAuth(slack):

    dryRun = args.dryRun

    if not dryRun:
    with open('user_id_to_username_map.json', 'w') as outFile:
    print("writing userid to username mapping")
    json.dump(userIdNameMap, outFile, indent=4)

    if not args.skipChannels:
    getChannels(slack, dryRun)

  5. @Chandler Chandler revised this gist Dec 22, 2015. 1 changed file with 3 additions and 3 deletions.
    6 changes: 3 additions & 3 deletions slack_history.py
    Original file line number Diff line number Diff line change
    @@ -72,7 +72,7 @@ def getChannels(slack, dryRun):
    fileName = channel['name'] + ".json"
    with open(fileName, 'w') as outFile:
    print("writing {0} records to {1}".format(len(messages), fileName))
    json.dump(messages, outFile)
    json.dump(messages, outFile, indent=4)

    # fetch and write history for all direct message conversations
    # also known as IMs in the slack API.
    @@ -91,7 +91,7 @@ def getDirectMessages(slack, userIdNameMap, dryRun):
    fileName = name + ".json"
    with open(fileName, 'w') as outFile:
    print("writing {0} records to {1}".format(len(messages), fileName))
    json.dump(messages, outFile)
    json.dump(messages, outFile, indent=4)

    # fetch and write history for all private channels
    # also known as groups in the slack API.
    @@ -110,7 +110,7 @@ def getPrivateChannels(slack, dryRun):
    messages = getHistory(slack.groups, group['id'])
    with open(fileName, 'w') as outFile:
    print("writing {0} records to {1}".format(len(messages), fileName))
    json.dump(messages, outFile)
    json.dump(messages, outFile, indent=4)

    # fetch all users for the channel and return a map userId -> userName
    def getUserMap(slack):
  6. @Chandler Chandler created this gist Dec 20, 2015.
    178 changes: 178 additions & 0 deletions slack_history.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,178 @@
    from slacker import Slacker
    import json
    import argparse

    # This script finds all channels, private channels and direct messages
    # that your user participates in, downloads the complete history for
    # those converations and writes each conversation out to seperate json files.
    #
    # This user centric history gathering is nice because the official slack data exporter
    # only exports public channels.
    #
    # PS, this only works if your slack team has a paid account which allows for unlimited history.
    #
    # PPS, this use of the API is blessed by Slack.
    # https://get.slack.help/hc/en-us/articles/204897248
    # " If you want to export the contents of your own private groups and direct messages
    # please see our API documentation."
    #
    # get your slack user token at the bottom of this page
    # https://api.slack.com/web
    #
    # dependencies:
    # pip install slacker #https://github.com/os/slacker
    #
    # usage examples
    # python slack_history.py --token='123token'
    # python slack_history.py --token='123token' --dryRun=True
    # python slack_history.py --token='123token' --skipDirectMessages
    # python slack_history.py --token='123token' --skipDirectMessages --skipPrivateChannels


    # fetches the complete message history for a channel/group/im
    #
    # pageableObject could be:
    # slack.channel
    # slack.groups
    # slack.im
    #
    # channelId is the id of the channel/group/im you want to download history for.
    def getHistory(pageableObject, channelId, pageSize = 100):
    messages = []
    lastTimestamp = None

    while(True):
    response = pageableObject.history(
    channel = channelId,
    latest = lastTimestamp,
    oldest = 0,
    count = pageSize
    ).body

    messages.extend(response['messages'])

    if (response['has_more'] == True):
    lastTimestamp = messages[-1]['ts'] # -1 means last element in a list
    else:
    break
    return messages

    # fetch and write history for all public channels
    def getChannels(slack, dryRun):
    channels = slack.channels.list().body['channels']

    print("\nfound channels: ")
    for channel in channels:
    print(channel['name'])

    if not dryRun:
    for channel in channels:
    print("getting history for channel {0}".format(channel['name']))
    messages = getHistory(slack.channels, channel['id'])
    fileName = channel['name'] + ".json"
    with open(fileName, 'w') as outFile:
    print("writing {0} records to {1}".format(len(messages), fileName))
    json.dump(messages, outFile)

    # fetch and write history for all direct message conversations
    # also known as IMs in the slack API.
    def getDirectMessages(slack, userIdNameMap, dryRun):
    dms = slack.im.list().body['ims']

    print("\nfound direct messages (1:1) with the following users:")
    for dm in dms:
    print(userIdNameMap.get(dm['user'], dm['user'] + " (name unknown)"))

    if not dryRun:
    for dm in dms:
    name = userIdNameMap.get(dm['user'], dm['user'] + " (name unknown)")
    print("getting history for direct messages with {0}".format(name))
    messages = getHistory(slack.im, dm['id'])
    fileName = name + ".json"
    with open(fileName, 'w') as outFile:
    print("writing {0} records to {1}".format(len(messages), fileName))
    json.dump(messages, outFile)

    # fetch and write history for all private channels
    # also known as groups in the slack API.
    def getPrivateChannels(slack, dryRun):
    groups = slack.groups.list().body['groups']

    print("\nfound groups:")
    for group in groups:
    print("{0}: ({1} members)".format(group['name'], len(group['members'])))

    if not dryRun:
    for group in groups:
    messages = []
    fileName = group['name'] + ".json"
    print("getting history for group {0} with id {1}".format(group['name'], group['id']))
    messages = getHistory(slack.groups, group['id'])
    with open(fileName, 'w') as outFile:
    print("writing {0} records to {1}".format(len(messages), fileName))
    json.dump(messages, outFile)

    # fetch all users for the channel and return a map userId -> userName
    def getUserMap(slack):
    #get all users in the slack organization
    users = slack.users.list().body['members']
    userIdNameMap = {}
    for user in users:
    userIdNameMap[user['id']] = user['name']
    print("found {0} users ".format(len(users)))
    return userIdNameMap

    # get basic info about the slack channel to ensure the authentication token works
    def doTestAuth(slack):
    testAuth = slack.auth.test().body
    teamName = testAuth['team']
    currentUser = testAuth['user']
    print("Successfully authenticated for team {0} and user {1} ".format(teamName, currentUser))

    if __name__ == "__main__":
    parser = argparse.ArgumentParser(description='download slack history')

    parser.add_argument('--token', help="an api token for a slack user")

    parser.add_argument(
    '--dryRun',
    action='store_true',
    default=False,
    help="if dryRun is true, don't fetch/write history only get channel names")

    parser.add_argument(
    '--skipPrivateChannels',
    action='store_true',
    default=False,
    help="skip fetching history for private channels")

    parser.add_argument(
    '--skipChannels',
    action='store_true',
    default=False,
    help="skip fetching history for channels")

    parser.add_argument(
    '--skipDirectMessages',
    action='store_true',
    default=False,
    help="skip fetching history for directMessages")

    args = parser.parse_args()

    slack = Slacker(args.token)

    testAuth = doTestAuth(slack)

    userIdNameMap = getUserMap(slack)

    dryRun = args.dryRun

    if not args.skipChannels:
    getChannels(slack, dryRun)

    if not args.skipPrivateChannels:
    getPrivateChannels(slack, dryRun)

    if not args.skipDirectMessages:
    getDirectMessages(slack, userIdNameMap, dryRun)