pabloem · August 23, 2021 18:38 · Aug 23, 2021
diff --git a/README.md b/README.md
@@ -0,0 +1,29 @@
+## Tail At Scale
+
+Codigo para demostrar distribuciones de latencia en un servicio
+de software con distintos algoritmos.
+
+
+**Para instalar el entorno en Linux/Unix**:
+
+```
+virtualenv venv
+. venv/bin/activate
+pip install -r requirements.txt
+```
+
+**Para ejecutar:**
+
+En una ventana:
+
+```
+python search.py
+```
+
+En otra ventana:
+
+```
+python measure.py
+```
+
+Revisar `out.csv` para ver las latencias y analizarlas.
diff --git a/measure.py b/measure.py
@@ -0,0 +1,71 @@
+import argparse
+import csv
+import logging
+import random
+import requests
+
+
+HOST = '127.0.0.1'
+PORT = '5000'
+PATHS = [
+    'binary',
+    'hash',
+    'list'
+]
+
+
+def measure_distribution(word_list):
+  req_count = 0
+  for word in word_list:
+    word_results = {}
+    for path in random.sample(PATHS, 3):
+      full_path = f'http://{HOST}:{PORT}/{path}/{word}'
+      req_count += 1
+      response = requests.get(full_path)
+      word_results[path] = response.elapsed.total_seconds() * 1000
+
+      if req_count and req_count % 1000 == 0:
+        logging.info('Ran %s requests', req_count)
+
+    yield word_results
+
+
+def save_distribution(word_list, fname):
+  with open(fname, 'w') as csvfile:
+    writer = csv.DictWriter(csvfile, fieldnames=PATHS)
+    writer.writeheader()
+
+    for sample_row in measure_distribution(word_list):
+      writer.writerow(sample_row)
+
+
+def read_words(fname):
+  result = []
+  logging.info('Loading directory from %s', fname)
+  with open(fname, 'r') as f:
+    for word in f:
+      word = word.split('/')[0]
+      word = word.strip()
+      result.append(word)
+
+  random.shuffle(result)
+  return result
+
+
+parser = argparse.ArgumentParser(description='Client to gather latency data')
+parser.add_argument('--input', help='File with list of words',
+    default='zdict_corto.dic')
+parser.add_argument('--output', help='CSV file to write latency data',
+    default='out.csv')
+
+
+def run(args):
+  logging.getLogger().setLevel('INFO')
+  word_list = read_words(args.input)
+  logging.info('Saving distribution over %d words', len(word_list))
+  save_distribution(word_list, args.output)
+
+
+if __name__ == '__main__':
+  args = parser.parse_args()
+  run(args)
diff --git a/search.py b/search.py
@@ -0,0 +1,78 @@
+import argparse
+import json
+import logging
+from flask import Flask
+
+
+parser = argparse.ArgumentParser(description='Server to demonstrate latency')
+parser.add_argument('--dictionary', help='File with list of words',
+    default='zdict_largo.dic')
+
+
+LIST_DIRECTORY = []
+SET_DIRECTORY = set()
+
+
+def _binary_search(directory, start, end, word):
+  spot = (start + end) // 2
+  if directory[spot] == word:
+    return True
+  elif spot == start:
+    return False
+  elif directory[spot] > word:
+    return _binary_search(directory, start, spot, word)
+  else:
+    assert directory[spot] < word
+    return _binary_search(directory, spot, end, word)
+
+
+def _list_search(directory: list, word):
+  return word in directory
+
+
+def _hash_search(directory: set, word):
+  return word in directory
+
+
+def _load_dicts(fname):
+  logging.info('Loading directory from %s', fname)
+  with open(fname, 'r') as f:
+    for word in f:
+      word = word.split('/')[0]
+      word = word.strip()
+      LIST_DIRECTORY.append(word)
+      SET_DIRECTORY.add(word)
+
+  LIST_DIRECTORY.sort()
+  logging.info('Directory has %d/%d words. Sample:\n%s',
+      len(LIST_DIRECTORY), len(SET_DIRECTORY), LIST_DIRECTORY[0:10])
+
+
+app = Flask(__name__)
+
+@app.route("/binary/<word>")
+def serve_binary(word):
+  return json.dumps({
+      'word': word,
+      'found': _binary_search(LIST_DIRECTORY, 0, len(LIST_DIRECTORY) + 1, word)})
+
+@app.route("/list/<word>")
+def serve_list(word):
+  return json.dumps({
+      'word': word,
+      'found': _list_search(LIST_DIRECTORY, word)})
+
+@app.route("/hash/<word>")
+def serve_hash(word):
+  return json.dumps({
+      'word': word,
+      'found': _hash_search(SET_DIRECTORY, word)})
+
+
+if __name__ == '__main__':
+  logging.getLogger().setLevel('WARNING')
+  logging.getLogger('werkzeug').setLevel('ERROR')
+  args = parser.parse_args()
+  _load_dicts(args.dictionary)
+  app.run()
+
diff --git a/zdict_corto.dic b/zdict_corto.dic
diff --git a/zdict_largo.dic b/zdict_largo.dic
No results found