myociss · August 15, 2025 17:30 · Aug 15, 2025
diff --git a/load_dat.py b/load_dat.py
@@ -0,0 +1,30 @@
+import numpy as np
+from typing import List, Dict, Tuple
+import gzip
+import datetime
+
+array_type = np.ndarray[tuple[float], np.dtype[np.float64]]
+
+def load_dat(gz_file_path: str, min_stations: int=7, max_chi_squared: float=1.0, max_altitude: float=20e3) -> Tuple[array_type, datetime.datetime]:
+
+    with gzip.open(gz_file_path, 'rt') as f:
+        lines = f.readlines()
+
+    start_time_str = ""
+    for l in lines:
+        if "Data start time:" in l:
+            start_time_str = l.replace("Data start time:", "").strip()
+
+    start_time = datetime.datetime.strptime(start_time_str, "%m/%d/%y %H:%M:%S")
+    idx = 1 + lines.index("*** data ***\n")
+    lines = lines[idx:]
+    data = np.zeros((len(lines), 7))
+
+    for line_idx, l in enumerate(lines):
+        splt = l.strip().split()
+        for j in range(6):
+            data[line_idx,j] = float(splt[j])
+        data[line_idx,6] = float(int(splt[6], 0).bit_count())
+
+    # from https://github.com/deeplycloudy/lmatools/blob/8d55e11dfbbe040f58f9a393f83e33e2a4b84b4c/examples/flashsort/clustertests/lma.py#L144
+    return data[(data[:,6] >= min_stations) & (data[:,4] <= max_chi_squared) & (data[:,4] < max_altitude)], start_time
No results found