Skip to content

Instantly share code, notes, and snippets.

@SavvyGuard
Last active August 29, 2015 14:10
Show Gist options
  • Save SavvyGuard/8ca917d04b1bf94312a2 to your computer and use it in GitHub Desktop.
Save SavvyGuard/8ca917d04b1bf94312a2 to your computer and use it in GitHub Desktop.
Makes requests to the data warehouse and returns results in json
import subprocess
import argparse
import json
def query(user, host, query):
bash_query = '/opt/spark/bin/beeline -u jdbc:hive2://cdh-head-node-001:10000 -e "{query}" --silent --outputFormat=vertical'.format(query=query)
process = subprocess.Popen(
[
'ssh',
'{user}@{host}'.format(user=user,host=host),
'{bash_query}'.format(bash_query=bash_query)
],
stdout=subprocess.PIPE,
stderr=subprocess.PIPE
)
process.wait()
raw = process.stdout.readlines()
cleaned_results = filter(lambda x:x != '\n', raw[0:-1])
clean_results = {}
for result in filter(lambda x:x != '\n', raw[0:-1]):
result_list = result.split()
try:
clean_results[result_list[0]].append(result_list[1:])
except KeyError as e:
clean_results[result_list[0]] = result_list[1:]
result_string = json.dumps(clean_results,
{
'4': 5,
'6': 7
},
sort_keys=True,
indent=4,
separators=(',', ': ')
)
print result_string
def execute():
parser = argparse.ArgumentParser()
parser.add_argument("-u", "--user", help="set username for beeline machine")
parser.add_argument("-t", "--host", help="set host for beeline machine")
parser.add_argument("-q", "--query", help="set the query to run")
args = parser.parse_args()
query(args.user, args.host, args.query)
if __name__ == '__main__':
execute()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment