# -*- nim -*- # # $Id: ef48349b0e2f28a0c8b7f076e84e875a6a86f3a4 $ # # hash table wrapper with sorter # # Copyright (c) 2017 Jordan Hrycaj # All rights reserved. # # Permission to use, copy, modify, and distribute this software for any # purpose with or without fee is hereby granted, provided that the above # copyright notice and this permission notice appear in all copies. # # The author or authors of this code dedicate any and all copyright interest # in this code to the public domain. We make this dedication for the benefit # of the public at large and to the detriment of our heirs and successors. # We intend this dedication to be an overt act of relinquishment in # perpetuity of all present and future rights to this code under copyright # law. # # THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES # WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF # MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR # ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES # WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN # ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF # OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. # ## Hash table extension with on-the-fly sorter for uint64 keys. ## ## This module works as a TableRef (see the table module) unless order ## relations are needed, for instance: ## ## .. code-block:: ## import hlst ## ## var h = newHLst64[string](nil,5) # nil: return as void value ## ## h[2] = "two" ## h[4] = "four" ## h[5] = "five" ## ## doAssert h[2] == "two" ## doAssert h[6].isNil # nil was initialised as void value ## ## If order relations come into play, the underlying TableRef will ## be ad-hoc extended by a sorter index. This enables simple queries ## ge(), lt(), etc. Example: ## ## .. code-block:: ## doAssert h.ge(8).isNil ## doAssert h.gt(0) == "two" ## ## Once activated, the sorter index will stay in the background to be ## readily available. For repeated add/delete operations, the sorter ## inxed sould be flushed as it slows down these operations. This might ## not be a problem for sporadic add/delete use after order relation ## usage. ## ## To flush the sorter index use unSort() as in the example: ## ## .. code-block:: ## h.unSort() ## ## or combined with other operations ## ## .. code-block:: ## h.unSort().del(5) ## ## Although the ad-hoc nature of the sorter index does not perform ## uniformly, with C projects I observed better performance with the ## on-the-fly hash table sorters than with red-black trees under the ## following conditions: ## ## * collect data first, rarely use order relations at this stage ## * pretty print sorted collected data when finished ## import algorithm, math, sequtils, tables const minTabInit = 8 kVoid = (not 0u64) type HLst64*[T] = ref HLst64Obj[T] ## table object reference HLst64Obj[T] = object keys: seq[uint64] kDirty: bool # seq[] unsorted, may not exist or contain high(K) entries kStart: int # start of valid keys in seq[], >0 => kDirty tab: TableRef[uint64,T] ifFail: T KeyInxFn = proc(sorted: seq[uint64]; key: uint64): int # ---------------------------------------------------------------------------- # Private methods # ---------------------------------------------------------------------------- proc inxLe(q: seq[uint64]; key: uint64): int {.inline.} = result = q.lowerBound(key, cmp) if q.len <= result or key < q[result]: result.dec proc inxGe[uint64](q: seq[uint64]; key: uint64): int {.inline.} = result = q.lowerBound(key, cmp) if q.len <= result: result = -1 proc tabKeyList[T](st: HLst64[T]): seq[uint64] {.inline.} = result = newSeq[uint64](st.tab.len) var n = 0 for k in st.tab.keys: result[n] = k.uint64 n.inc proc sorter[T](st: HLst64[T]): HLst64[T] {.discardable.} = if st.kDirty: if st.keys.isNil: st.keys = st.tabKeyList st.keys.sort(cmp) if st.keys[st.keys.len-1] == kVoid: var lastValid = st.keys.inxLe(kVoid-1) if 0 <= lastValid: st.keys.setlen(lastValid+1) st.kStart = 0 st.kDirty = false result = st proc getKey[T](st: Hlst64[T]; inx: KeyInxFn; key: uint64): uint64 = if 0 < st.tab.len and key != kVoid: var n = st.sorter().keys.inx(key) if 0 <= n: return st.keys[n] result = kVoid proc key2Val[T](st: Hlst64[T]; key: uint64): T = if key != kVoid: st.tab[key] else: st.ifFail proc haveStartSlot[T](st: Hlst64[T]): bool {.inline.} = # can use start key for comparing 1st item (not st.kDirty and 0 < st.keys.len) or 0 < st.kStart # => not st.keys.isNil proc unqFirstSlot[T](st: Hlst64[T]): uint64 {.discardable.} = assert st.haveStartSlot() result = st.keys[st.kStart] st.kDirty = true # update sorter list if st.kStart + 1 == st.keys.len: # sorter list becomes empty? st.keys = nil st.kStart = 0 else: st.keys[st.kStart] = kVoid # set index void st.kStart.inc proc disSlot[T](st: Hlst64[T]; n: int): uint64 {.discardable.} = assert 0 <= n and n < st.keys.len result = st.keys[n] if st.keys.len == 1: # last item st.keys = nil # no sorter list needed st.kDirty = true elif n + 1 == st.keys.len: # on last slot => chop sorter list st.keys.setLen(n) # may stay clean else: st.keys[n] = kVoid # de-activate slot st.kDirty = true # ---------------------------------------------------------------------------- # Public methods # ---------------------------------------------------------------------------- proc newHlst64*[T](ifFail: T; tabInit = 64): Hlst64[T] = ## new hashed tab with on-the-fly sorter, the ifFail argument is returned ## as a valie when table lookup fails (see also HLst64VoidKey()) new result result.kDirty = true # start with: st.keys.isNil result.tab = newTable[uint64,T](nextPowerOfTwo(min(tabInit,minTabInit))) result.ifFail = ifFail proc unSort*[T](st: Hlst64[T]): Hlst64[T] {.discardable.} = ## remove sorter and free resources, ## useful if order relations are not needed anymore result = st st.keys = nil st.kDirty = true st.kStart = 0 proc sort*[T](st: Hlst64[T]): Hlst64[T] {.discardable,inline.} = ## explicitely force sorter, not needed under mormal circumstances st.sorter() proc len*[T](st: Hlst64[T]): int = ## returns the number of keys in the list st.tab.len proc clear*[T](st: Hlst64[T]): Hlst64[T] {.discardable.} = ## resets the list so that it becomes empty st.unSort.tab.clear proc HLstVoidKey*[T](st: Hlst64[T]): uint64 {.inline.} = ## pseudo constant, void key kVoid proc HLstMaxKey*[T](st: Hlst64[T]): uint64 {.inline.} = ## pseudo constant, one less than void key kVoid - 1 proc HLstVoidVal*[T](st: Hlst64[T]): T {.inline.} = ## pseudo constant, void value st.ifFail proc `[]=`*[T](st: Hlst64[T]; key: uint64; data: T) = ## add or update a value; the key argument must be smaller than high(K) ## which is -- in the case of int keys -- the C equivalent of INTMAX assert key < kVoid assert data != st.ifFail if not st.keys.isNil and not st.tab.hasKey(key): st.keys.add(key) st.kDirty = true st.tab[key] = data proc del*[T](st: Hlst64[T]; key: uint64): Hlst64[T] {.discardable.} = ## delete item in table with key == argument key result = st if st.tab.hasKey(key): st.tab.del(key) # delete from table if st.keys.isNil: # is there a sorter list, at all? return # nothing to do if st.haveStartSlot() and key == st.keys[st.kStart]: st.unqFirstSlot() # some easy case (avoids sorting) return var slot = st.sorter().keys.binarySearch(key) if 0 <= slot: st.disSlot(slot) proc hasKey*[T](st: Hlst64[T]; key: uint64): bool = ## Returns true key if it exists st.tab.hasKey(key) proc eqKey*[T](st: Hlst64[T]; key: uint64): uint64 = ## Returns argument key if it exists, HLstVoidKey otherwise if st.tab.hasKey(key): key else: kVoid proc leKey*[T](st: Hlst64[T]; key: uint64): uint64 = ## find maximum key less or equal argument (or HLatVoidKey) proc leFn(q: seq[uint64]; key: uint64): int = inxLe(q, key) st.getKey(leFn, key) proc geKey*[K,T](st: Hlst64[T]; key: uint64): uint64 = ## find minimum key greater or equal argument (or HLatVoidKey) if st.haveStartSlot() and key <= st.keys[st.kStart]: st.keys[st.kStart] else: proc geFn(q: seq[uint64]; key: uint64): int = inxGe(q, key) st.getKey(geFn, key) proc ltKey*[T](st:Hlst64[T]; key: uint64): uint64 {.inline.} = ## ltKey(st,key-1) is an alias for leKey(st,key-1) leKey[T](st,key-1) proc gtKey*[T](st:Hlst64[T]; key: uint64): uint64 {.inline.} = ## gtKey(st,key-1) is an alias for geKey(st,key+1) geKey[T](st,key+1) proc minKey*[T](st:HLst64[T]): uint64 {.inline.} = ## retrieve mimimum key if st.haveStartSlot(): st.keys[st.kStart] elif 0 < st.tab.len: st.sorter().keys[0] else: kVoid proc unqueue*[T](st: Hlst64[T]): T = ## retrieve/fetch item with mimimum key (or HLstVoidVal) if not st.haveStartSlot(): if st.tab.len == 0: return st.ifFail st.sorter() var key = st.unqFirstSlot() result = st.tab[key] # return value from table st.tab.del(key) # remove table entry proc pop*[T](st: Hlst64[T]): T = ## retrieve/fetch item with maximum key (or HLstVoidVal) if st.kDirty: if st.tab.len == 0: return st.ifFail st.sorter() var key = st.disSlot(st.keys.len-1) result = st.tab[key] st.tab.del(key) proc eq*[T](st: Hlst64[T]; key: uint64): T = ## find value in table with key == argument, ## returns HLstVoidVal if there is no such key if st.tab.hasKey(key): st.tab[key] else: st.ifFail proc `[]`*[T](st:Hlst64[T]; key: uint64): T {.inline.} = ## st[key] is an alias for eq(st,key) eq[T](st,key) proc le*[T](st: Hlst64[T]; key: uint64): T = ## find value in table where key is maximum <= argument, ## returns HLstVoidVal if there is no such key proc leFn(q: seq[uint64]; key: uint64): int = inxLe(q, key) st.key2val(st.getKey(leFn, key)) proc ge*[T](st: Hlst64[T]; key: uint64): T = ## find value in table where key is minimum >= argument, ## returns HLstVoidVal if there is no such key if st.haveStartSlot() and key <= st.keys[st.kStart]: st.tab[st.keys[st.kStart]] else: proc geFn(q: seq[uint64]; key: uint64): int = inxGe(q, key) st.key2val(st.getKey(geFn, key)) proc lt*[T](st:Hlst64[T]; key: uint64): T {.inline.} = ## lt(st,key) is an alias for le(st,key-1) le[T](st, key - 1) proc gt*[T](st:Hlst64[T]; key: uint64): T {.inline.} = ## gt(st,key) is an alias for ge(st,key+1) ge[T](st, key + 1) iterator pairs*[T](st: Hlst64[T]): (uint64, T) {.inline.} = ## iterates over any (key,value) pair in the table st for k,v in st.tab.pairs: yield (k.uint64,v) iterator mpairs*[T](st: Hlst64[T]): (uint64, var T) {.inline.} = ## iterates over any (key,value) pair in the table st, ## the values can be modified for k,v in st.tab.mpairs: yield (k.uint64,v) iterator keys*[T](st: Hlst64[T]): uint64 {.inline.} = ## iterates over any key in the table st for k in st.tab.keys: yield k.uint64 iterator values*[T](st: Hlst64[T]): T {.inline.} = ## iterates over any value in the table st for v in st.tab.values: yield v iterator mvalues*[T](st: Hlst64[T]): var T {.inline.} = ## iterates over any value in the table st, ## the values can be modified for v in st.tab.mvalues: yield v iterator sortedKeys*[T](st: Hlst64[T]): uint64 {.inline.} = ## like keys() but over sorted table if st.tab.len != 0: if not st.haveStartSlot(): st.sorter() for n in st.kStart..>> ", h.repr # when not defined(check_run): # echo "*** No tests, yet" # ---------------------------------------------------------------------------- # End # ----------------------------------------------------------------------------