# -*- nim -*- # # $Id: 4e762ba679d9afef81a000625b956ab86254e316 $ # # hash table wrapper with sorter # # Copyright (c) 2017 Jordan Hrycaj # All rights reserved. # # Permission to use, copy, modify, and distribute this software for any # purpose with or without fee is hereby granted, provided that the above # copyright notice and this permission notice appear in all copies. # # The author or authors of this code dedicate any and all copyright interest # in this code to the public domain. We make this dedication for the benefit # of the public at large and to the detriment of our heirs and successors. # We intend this dedication to be an overt act of relinquishment in # perpetuity of all present and future rights to this code under copyright # law. # # THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES # WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF # MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR # ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES # WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN # ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF # OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. # ## Hash table extension with on-the-fly sorter for signed integer keys (see ## Hlst64 for uint64 keys). ## ## This module works as a TableRef (see the table module) unless order ## relations are needed, for instance: ## ## .. code-block:: ## import hlst ## ## var h = newHLst[int,string](nil,5) # nil: return as void value ## ## h[2] = "two" ## h[4] = "four" ## h[5] = "five" ## ## doAssert h[2] == "two" ## doAssert h[6].isNil # nil was initialised as void value ## ## If order relations come into play, the underlying TableRef will ## be ad-hoc extended by a sorter index. This enables simple queries ## ge(), lt(), etc. Example: ## ## .. code-block:: ## doAssert h.ge(8).isNil ## doAssert h.gt(0) == "two" ## ## Once activated, the sorter index will stay in the background to be ## readily available. For repeated add/delete operations, the sorter ## inxed sould be flushed as it slows down these operations. This might ## not be a problem for sporadic add/delete use after order relation ## usage. ## ## To flush the sorter index use unSort() as in the example: ## ## .. code-block:: ## h.unSort() ## ## or combined with other operations ## ## .. code-block:: ## h.unSort().del(5) ## ## Although the ad-hoc nature of the sorter index does not perform ## uniformly, with C projects I observed better performance with the ## on-the-fly hash table sorters than with red-black trees under the ## following conditions: ## ## * collect data first, rarely use order relations at this stage ## * pretty print sorted collected data when finished ## import algorithm, math, sequtils, tables const minTabInit = 8 type HLst*[K,T] = ref HLstObj[K,T] ## table object reference HLstObj[K,T] = object keys: seq[K] kDirty: bool # seq[] unsorted, may not exist or contain high(K) entries kStart: int # start of valid keys in seq[], >0 => kDirty tab: TableRef[K,T] ifFail: T KeyInxFn[K] = proc(sorted: seq[K]; key: K): int # ---------------------------------------------------------------------------- # Private methods # ---------------------------------------------------------------------------- proc inxLe[K](q: seq[K]; key: K): int {.inline.} = result = q.lowerBound(key, cmp) if q.len <= result or key < q[result]: result.dec proc inxGe[K](q: seq[K]; key: K): int {.inline.} = result = q.lowerBound(key, cmp) if q.len <= result: result = -1 proc sorter[K,T](st: HLst[K,T]): HLst[K,T] {.discardable.} = if st.kDirty: if st.keys.isNil: st.keys = toSeq(st.tab.keys()) st.keys.sort(cmp) if st.keys[st.keys.len-1] == high(K).K: var lastValid = st.keys.inxLe(high(K).K-1) if 0 <= lastValid: st.keys.setlen(lastValid+1) st.kStart = 0 st.kDirty = false result = st proc getKey[K,T](st: HLst[K,T]; inx: KeyInxFn[K]; key: K): K = if 0 < st.tab.len and key != high(K).K: var n = st.sorter().keys.inx(key) if 0 <= n: return st.keys[n] result = high(K).K proc key2Val[K,T](st: HLst[K,T]; key: K): T = if key != high(K).K: st.tab[key] else: st.ifFail proc haveStartSlot[K,T](st: HLst[K,T]): bool {.inline.} = # can use start key for comparing 1st item (not st.kDirty and 0 < st.keys.len) or 0 < st.kStart # => not st.keys.isNil proc unqFirstSlot[K,T](st: HLst[K,T]): K {.discardable.} = assert st.haveStartSlot() result = st.keys[st.kStart] st.kDirty = true # update sorter list if st.kStart + 1 == st.keys.len: # sorter list becomes empty? st.keys = nil st.kStart = 0 else: st.keys[st.kStart] = high(K).K # set index void st.kStart.inc proc disSlot[K,T](st: HLst[K,T]; n: int): K {.discardable.} = assert 0 <= n and n < st.keys.len result = st.keys[n] if st.keys.len == 1: # last item st.keys = nil # no sorter list needed st.kDirty = true elif n + 1 == st.keys.len: # on last slot => chop sorter list st.keys.setLen(n) # may stay clean else: st.keys[n] = high(K).K # de-activate slot st.kDirty = true # ---------------------------------------------------------------------------- # Public methods # ---------------------------------------------------------------------------- proc newHlst*[K,T](ifFail: T; tabInit = 64): HLst[K,T] = ## new hashed tab with on-the-fly sorter, the ifFail argument is returned ## as a valie when table lookup fails (see also HLstVoidKey()) new result result.kDirty = true # start with: st.keys.isNil result.tab = newTable[K,T](nextPowerOfTwo(min(tabInit,minTabInit))) result.ifFail = ifFail proc unSort*[K,T](st: HLst[K,T]): HLst[K,T] {.discardable.} = ## remove sorter and free resources, ## useful if order relations are not needed anymore result = st st.keys = nil st.kDirty = true st.kStart = 0 proc sort*[K,T](st: HLst[K,T]): HLst[K,T] {.discardable,inline.} = ## explicitely force sorter, not needed under mormal circumstances st.sorter() proc len*[K,T](st: HLst[K,T]): int = ## returns the number of keys in the list st.tab.len proc clear*[K,T](st: HLst[K,T]): HLst[K,T] {.discardable.} = ## resets the list so that it becomes empty st.unSort.tab.clear proc HLstVoidKey*[K,T](st: HLst[K,T]): K {.inline.} = ## pseudo constant, void key high(K).K proc HLstMaxKey*[K,T](st: HLst[K,T]): K {.inline.} = ## pseudo constant, one less than void key high(K).K - 1 proc HLstVoidVal*[K,T](st: HLst[K,T]): T {.inline.} = ## pseudo constant, void value st.ifFail proc `[]=`*[K,T](st: HLst[K,T]; key: K; data: T) = ## add or update a value; the key argument must be smaller than high(K) ## which is -- in the case of int keys -- the C equivalent of INTMAX assert key < high(K).K assert data != st.ifFail if not st.keys.isNil and not st.tab.hasKey(key): st.keys.add(key) st.kDirty = true st.tab[key] = data proc del*[K,T](st: HLst[K,T]; key: K): HLst[K,T] {.discardable.} = ## delete item in table with key == argument key result = st if st.tab.hasKey(key): st.tab.del(key) # delete from table if st.keys.isNil: # is there a sorter list, at all? return # nothing to do if st.haveStartSlot() and key == st.keys[st.kStart]: st.unqFirstSlot() # some easy case (avoids sorting) return var slot = st.sorter().keys.binarySearch(key) if 0 <= slot: st.disSlot(slot) proc hasKey*[K,T](st: HLst[K,T]; key: K): bool = ## Returns true key if it exists st.tab.hasKey(key) proc eqKey*[K,T](st: HLst[K,T]; key: K): K = ## Returns argument key if it exists, HLstVoidKey otherwise if st.tab.hasKey(key): key else: high(K).K proc leKey*[K,T](st: HLst[K,T]; key: K): K = ## find maximum key less or equal argument (or HLatVoidKey) proc leFn(q: seq[K]; key: K): int = inxLe[K](q, key) st.getKey(leFn, key) proc geKey*[K,T](st: HLst[K,T]; key: K): K = ## find minimum key greater or equal argument (or HLatVoidKey) if st.haveStartSlot() and key <= st.keys[st.kStart]: st.keys[st.kStart] else: proc geFn(q: seq[K]; key: K): int = inxGe[K](q, key) st.getKey(geFn, key) proc ltKey*[K,T](st:HLst[K,T]; key:K): K {.inline.} = ## ltKey(st,key-1) is an alias for leKey(st,key-1) leKey[K,T](st,key-1) proc gtKey*[K,T](st:HLst[K,T]; key:K): K {.inline.} = ## gtKey(st,key-1) is an alias for geKey(st,key+1) geKey[K,T](st,key+1) proc minKey*[K,T](st:HLst[K,T]): K {.inline.} = ## retrieve mimimum key if st.haveStartSlot(): st.keys[st.kStart] elif 0 < st.tab.len: st.sorter().keys[0] else: high(K).K proc unqueue*[K,T](st: HLst[K,T]): T = ## retrieve/fetch item with mimimum key (or HLstVoidVal) if not st.haveStartSlot(): if st.tab.len == 0: return st.ifFail st.sorter() var key = st.unqFirstSlot() result = st.tab[key] # return value from table st.tab.del(key) # remove table entry proc pop*[K,T](st: HLst[K,T]): T = ## retrieve/fetch item with maximum key (or HLstVoidVal) if st.kDirty: if st.tab.len == 0: return st.ifFail st.sorter() var key = st.disSlot(st.keys.len-1) result = st.tab[key] st.tab.del(key) proc eq*[K,T](st: HLst[K,T]; key: K): T = ## find value in table with key == argument, ## returns HLstVoidVal if there is no such key if st.tab.hasKey(key): st.tab[key] else: st.ifFail proc `[]`*[K,T](st:HLst[K,T]; key:K): T {.inline.} = ## st[key] is an alias for eq(st,key) eq[K,T](st,key) proc le*[K,T](st: HLst[K,T]; key: K): T = ## find value in table where key is maximum <= argument, ## returns HLstVoidVal if there is no such key proc leFn(q: seq[K]; key: K): int = inxLe[K](q, key) st.key2val(st.getKey(leFn, key)) proc ge*[K,T](st: HLst[K,T]; key: K): T = ## find value in table where key is minimum >= argument, ## returns HLstVoidVal if there is no such key if st.haveStartSlot() and key <= st.keys[st.kStart]: st.tab[st.keys[st.kStart]] else: proc geFn(q: seq[K]; key: K): int = inxGe[K](q, key) st.key2val(st.getKey(geFn, key)) proc lt*[K,T](st:HLst[K,T]; key:K): T {.inline.} = ## lt(st,key) is an alias for le(st,key-1) le[K,T](st,key-1) proc gt*[K,T](st:HLst[K,T]; key:K): T {.inline.} = ## gt(st,key) is an alias for ge(st,key+1) ge[K,T](st,key+1) iterator pairs*[K,T](st: HLst[K,T]): (K, T) {.inline.} = ## iterates over any (key,value) pair in the table st for k,v in st.tab.pairs: yield (k,v) iterator mpairs*[K,T](st: HLst[K,T]): (K, var T) {.inline.} = ## iterates over any (key,value) pair in the table st, ## the values can be modified for k,v in st.tab.mpairs: yield (k,v) iterator keys*[K,T](st: HLst[K,T]): K {.inline.} = ## iterates over any key in the table st for k in st.tab.keys: yield k iterator values*[K,T](st: HLst[K,T]): T {.inline.} = ## iterates over any value in the table st for v in st.tab.values: yield v iterator mvalues*[K,T](st: HLst[K,T]): var T {.inline.} = ## iterates over any value in the table st, ## the values can be modified for v in st.tab.mvalues: yield v iterator sortedKeys*[K,T](st: HLst[K,T]): K {.inline.} = ## like keys() but over sorted table if st.tab.len != 0: if not st.haveStartSlot(): st.sorter() for n in st.kStart..>> ", h.repr # when not defined(check_run): # echo "*** No tests, yet" # ---------------------------------------------------------------------------- # End # ----------------------------------------------------------------------------