def test(method, bits, random_top_layer, quantize_top_layer, results={}): print(prompt) start_time = time.time() if method == 'vanilla': cnt_tokens = test_vanilla(bits) elif method == 'eagle': cnt_tokens = test_eagle(bits, random_top_layer=random_top_layer, quantize_top_layer=quantize_top_layer ) time_delta = time.time() - start_time speed = cnt_tokens / time_delta print('e2e speed:', time_delta, cnt_tokens, speed) results.update(dict(time_delta=time_delta, cnt_tokens=cnt_tokens, speed=speed)) if __name__ == '__main__': import os import argparse import pandas as pd import multiprocessing, threading from colorama import Fore, Back, Style parser = argparse.ArgumentParser(description='Pandas Fire ArgumentParser') parser.add_argument('--debug', action='store_true') args = parser.parse_args() manager = multiprocessing.Manager() df_params = pd.read_csv('params-awq.tsv', sep='\t', header=0) df_params = df_params.replace({float('nan'): None}) df_results = [] for params in df_params.to_dict(orient='records'): print(Fore.RED, Back.YELLOW, params, Style.RESET_ALL) try: params['results'] = manager.dict() os.environ["TOKENIZERS_PARALLELISM"] = "true" if args.debug: process = threading.Thread(target=test, kwargs=params) else: process = multiprocessing.Process(target=test, kwargs=params) process.start() process.join() except: if args.debug: pass else: process.terminate() break results = dict(params['results']) print(Fore.RED, Back.YELLOW, results, Style.RESET_ALL, end='\n\n') df_results.append(results) df_results = pd.DataFrame(df_results) df_output = df_params.join(df_results) print(df_output) df_output.to_csv('output-awq.tsv', sep='\t', index=False)