# /usr/bin/python import subprocess import argparse import json import datetime def query(user, host, query): bash_query = '/opt/spark/bin/beeline -u jdbc:hive2://dp-cluster-master-node-001:10000 -e "{query}" --silent --outputFormat=vertical'.format(query=query) process = subprocess.Popen( [ 'ssh', '{user}@{host}'.format(user=user,host=host), '{bash_query}'.format(bash_query=bash_query) ], stdout=subprocess.PIPE, stderr=subprocess.PIPE ) process.wait() raw = process.stdout.readlines() cleaned_results = filter(lambda x:x != '\n', raw[0:-1]) clean_results = {} for result in filter(lambda x:x != '\n', raw[0:-1]): result_list = result.split() try: clean_results[result_list[0]].extend(result_list[1:]) except KeyError as e: clean_results[result_list[0]] = result_list[1:] result_string = json.dumps(clean_results, { '4': 5, '6': 7 }, sort_keys=True, indent=4, separators=(',', ': ') ) print result_string #store_in_firebase(clean_results) return clean_results def execute(): parser = argparse.ArgumentParser() parser.add_argument("-u", "--user", help="set username for beeline machine") parser.add_argument("-t", "--host", help="set host for beeline machine") parser.add_argument("-q", "--query", help="set the query to run") args = parser.parse_args() query(args.user, args.host, args.query) if __name__ == '__main__': execute() #query('james','54.174.83.219', 'select count(1) as count from event.bid_request where ds=20141122 and h=9')