import praw def get_subreddit_comments(reddit_agent, subreddit, comments_out = [], count = 100): try: sub = reddit_agent.get_subreddit(subreddit) comments_raw = sub.get_comments(sub, limit=count) comments_flat = praw.helpers.flatten_tree(comments_raw) for comment in comments_flat: try: if hasattr(comment, 'comments'): for reply in comment.comments: comments_out.append(reply.body) else: comments_out.append(comment.body) except: pass except: pass def get_reddit_agent(user_agent, client_id, client_secret, redirect='http://127.0.0.1'): reddit_agent = praw.Reddit(user_agent = 'sorry counter') reddit_agent.set_oauth_app_info(client_id = client_id, client_secret = client_secret, redirect_uri = redirect) return reddit_agent def get_sorry_and_word_count(comment_list, apologies = ['sorry', 'apologies']): sorry_count = 0 word_count = 0 for comment in comment_list: words = comment.split(' ') word_count += int(len(words)) for word in words: for apology in apologies: sorry_count += word.lower().count(apology) return sorry_count, word_count if __name__ == "__main__": client_id = 'your_client_id' client_secret = 'your_client_secret' reddit_agent = get_reddit_agent('custom name for app', client_id, client_secret) #list of canadian reddits to search canada_reddits = ['canada','alberta','britishcolumbia','Manitoba','NewBrunswickCanada', 'newfoundland', 'NovaScotia','nunavut','NWT','ontario','PEI', 'saskatchewan','Yukon'] #empty list where comments will be added canada_comments = [] for subreddit in canada_reddits: get_subreddit_comments(reddit_agent, subreddit, canada_comments, count = (1000 / len(canada_reddits))) canada_sorry, canada_total_words = get_sorry_and_word_count(canada_comments) #empty list for world comments to be added world_comments = [] #Using all to represent the global sample, so only 1 subreddit passed in get_subreddit_comments(reddit_agent, 'all', world_comments, count = 1000) world_sorry, world_total_words = get_sorry_and_word_count(world_comments) #print results print("Canada {} / {}, All {} / {}".format(canada_sorry, canada_total_words, world_sorry, world_total_words)) print("Canada {}% sorry, World {}%".format(float(canada_sorry)/canada_total_words * 100, float(world_sorry)/world_total_words*100))