Created
August 23, 2021 18:38
-
-
Save pabloem/845d96e5142fbbbd1edd8512ac416fdf to your computer and use it in GitHub Desktop.
Revisions
-
pabloem created this gist
Aug 23, 2021 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,29 @@ ## Tail At Scale Codigo para demostrar distribuciones de latencia en un servicio de software con distintos algoritmos. **Para instalar el entorno en Linux/Unix**: ``` virtualenv venv . venv/bin/activate pip install -r requirements.txt ``` **Para ejecutar:** En una ventana: ``` python search.py ``` En otra ventana: ``` python measure.py ``` Revisar `out.csv` para ver las latencias y analizarlas. This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,71 @@ import argparse import csv import logging import random import requests HOST = '127.0.0.1' PORT = '5000' PATHS = [ 'binary', 'hash', 'list' ] def measure_distribution(word_list): req_count = 0 for word in word_list: word_results = {} for path in random.sample(PATHS, 3): full_path = f'http://{HOST}:{PORT}/{path}/{word}' req_count += 1 response = requests.get(full_path) word_results[path] = response.elapsed.total_seconds() * 1000 if req_count and req_count % 1000 == 0: logging.info('Ran %s requests', req_count) yield word_results def save_distribution(word_list, fname): with open(fname, 'w') as csvfile: writer = csv.DictWriter(csvfile, fieldnames=PATHS) writer.writeheader() for sample_row in measure_distribution(word_list): writer.writerow(sample_row) def read_words(fname): result = [] logging.info('Loading directory from %s', fname) with open(fname, 'r') as f: for word in f: word = word.split('/')[0] word = word.strip() result.append(word) random.shuffle(result) return result parser = argparse.ArgumentParser(description='Client to gather latency data') parser.add_argument('--input', help='File with list of words', default='zdict_corto.dic') parser.add_argument('--output', help='CSV file to write latency data', default='out.csv') def run(args): logging.getLogger().setLevel('INFO') word_list = read_words(args.input) logging.info('Saving distribution over %d words', len(word_list)) save_distribution(word_list, args.output) if __name__ == '__main__': args = parser.parse_args() run(args) This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,78 @@ import argparse import json import logging from flask import Flask parser = argparse.ArgumentParser(description='Server to demonstrate latency') parser.add_argument('--dictionary', help='File with list of words', default='zdict_largo.dic') LIST_DIRECTORY = [] SET_DIRECTORY = set() def _binary_search(directory, start, end, word): spot = (start + end) // 2 if directory[spot] == word: return True elif spot == start: return False elif directory[spot] > word: return _binary_search(directory, start, spot, word) else: assert directory[spot] < word return _binary_search(directory, spot, end, word) def _list_search(directory: list, word): return word in directory def _hash_search(directory: set, word): return word in directory def _load_dicts(fname): logging.info('Loading directory from %s', fname) with open(fname, 'r') as f: for word in f: word = word.split('/')[0] word = word.strip() LIST_DIRECTORY.append(word) SET_DIRECTORY.add(word) LIST_DIRECTORY.sort() logging.info('Directory has %d/%d words. Sample:\n%s', len(LIST_DIRECTORY), len(SET_DIRECTORY), LIST_DIRECTORY[0:10]) app = Flask(__name__) @app.route("/binary/<word>") def serve_binary(word): return json.dumps({ 'word': word, 'found': _binary_search(LIST_DIRECTORY, 0, len(LIST_DIRECTORY) + 1, word)}) @app.route("/list/<word>") def serve_list(word): return json.dumps({ 'word': word, 'found': _list_search(LIST_DIRECTORY, word)}) @app.route("/hash/<word>") def serve_hash(word): return json.dumps({ 'word': word, 'found': _hash_search(SET_DIRECTORY, word)}) if __name__ == '__main__': logging.getLogger().setLevel('WARNING') logging.getLogger('werkzeug').setLevel('ERROR') args = parser.parse_args() _load_dicts(args.dictionary) app.run()