Skip to content

Instantly share code, notes, and snippets.

@myociss
Created August 15, 2025 17:30
Show Gist options
  • Select an option

  • Save myociss/ffc537daf95d55ad3d1be7d25b8e00a3 to your computer and use it in GitHub Desktop.

Select an option

Save myociss/ffc537daf95d55ad3d1be7d25b8e00a3 to your computer and use it in GitHub Desktop.

Revisions

  1. myociss created this gist Aug 15, 2025.
    30 changes: 30 additions & 0 deletions load_dat.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,30 @@
    import numpy as np
    from typing import List, Dict, Tuple
    import gzip
    import datetime

    array_type = np.ndarray[tuple[float], np.dtype[np.float64]]

    def load_dat(gz_file_path: str, min_stations: int=7, max_chi_squared: float=1.0, max_altitude: float=20e3) -> Tuple[array_type, datetime.datetime]:

    with gzip.open(gz_file_path, 'rt') as f:
    lines = f.readlines()

    start_time_str = ""
    for l in lines:
    if "Data start time:" in l:
    start_time_str = l.replace("Data start time:", "").strip()

    start_time = datetime.datetime.strptime(start_time_str, "%m/%d/%y %H:%M:%S")
    idx = 1 + lines.index("*** data ***\n")
    lines = lines[idx:]
    data = np.zeros((len(lines), 7))

    for line_idx, l in enumerate(lines):
    splt = l.strip().split()
    for j in range(6):
    data[line_idx,j] = float(splt[j])
    data[line_idx,6] = float(int(splt[6], 0).bit_count())

    # from https://github.com/deeplycloudy/lmatools/blob/8d55e11dfbbe040f58f9a393f83e33e2a4b84b4c/examples/flashsort/clustertests/lma.py#L144
    return data[(data[:,6] >= min_stations) & (data[:,4] <= max_chi_squared) & (data[:,4] < max_altitude)], start_time