(defn take-first-sorted-by "Performs the same result as (take n (sort-by keyfn comp coll)) but more efficient in terms of time and memory" ([n keyfn coll] (take-first-sorted-by n keyfn compare coll)) ([n keyfn ^java.util.Comparator comp coll] (if (pos? n) (let [m ^java.util.TreeMap (java.util.TreeMap. comp) m-size (volatile! 0) ;; if it is guaranteed that (keyfn v) will be unique for all v in coll ;; we can attach :unique? key to keyfn meta and algorythm will use single val map ;; instead of multi-map and will be faster & more optimal ;; But if we add :unique? meta and there will be any duplicates of (keyfn v) ;; result may be wrong unique-keyfn? (:unique? (meta keyfn)) ;; _ (prn :unique-keyfn? unique-keyfn?) m-add (if unique-keyfn? (fn [k v] (.put m k v)) (fn [k v] (if-let [a ^java.util.ArrayList (.get m k)] (.add a v) (.put m k (doto (java.util.ArrayList.) (.add v)))))) m-sub (if unique-keyfn? (fn [k] (.remove m k)) (fn [k] (let [a ^java.util.ArrayList (.get m k) last-index ^int (.intValue (dec (.size a)))] ;; last-index have to be int type! (.remove a ^int last-index) ;; place for happy debugging ))) (when (.isEmpty a) (.remove m k)))))] (doseq [v coll] (let [k (keyfn v)] (if (< @m-size n) (do (m-add k v) (vswap! m-size inc)) (let [max-key (.lastKey m)] (when (neg? (comp k max-key)) (m-sub max-key) (m-add k v)))))) (if unique-keyfn? (vals m) (->> m vals (apply concat)))) (list)))) (deftest take-first-sorted-by-test (testing "take-first-sorted-by = (take n (sort-by ...))" (doseq [n [0 1 10 30 150] keyfn [identity #(rem % 10) #(rem % 15) #(rem % 53)]] (let [coll (range 100) comp #(compare %2 %1)] (is (= (take n (sort-by keyfn coll)) (utils/take-first-sorted-by n keyfn coll))) (is (= (take n (sort-by keyfn comp coll)) (utils/take-first-sorted-by n keyfn comp coll)))))) (testing "take-first-sorted-by with :unique? keyfn meta" (doseq [n [0 1 10 30 150] unique? [true false]] (let [coll (map (fn [_] (let [x (str (d/squuid))] [(rem (hash x) 100) x])) (range 100)) keyfn (with-meta second {:unique? unique?}) comp #(compare %2 %1)] (is (= (take n (sort-by keyfn coll)) (utils/take-first-sorted-by n keyfn coll))) (is (= (take n (sort-by keyfn comp coll)) (utils/take-first-sorted-by n keyfn comp coll)))))))