# -*- nim -*-
#
# $Id: ef48349b0e2f28a0c8b7f076e84e875a6a86f3a4 $
#
# hash table wrapper with sorter
#
# Copyright (c) 2017 Jordan Hrycaj <jordan@teddy-net.com>
# All rights reserved.
#
# Permission to use, copy, modify, and distribute this software for any
# purpose with or without fee is hereby granted, provided that the above
# copyright notice and this permission notice appear in all copies.
#
# The author or authors of this code dedicate any and all copyright interest
# in this code to the public domain. We make this dedication for the benefit
# of the public at large and to the detriment of our heirs and successors.
# We intend this dedication to be an overt act of relinquishment in
# perpetuity of all present and future rights to this code under copyright
# law.
#
# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
# ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
# OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
#

## Hash table extension with on-the-fly sorter for uint64 keys.
##
## This module works as a TableRef (see the table module) unless order
## relations are needed, for instance:
##
## .. code-block::
##   import hlst
##
##   var h = newHLst64[string](nil,5)     # nil: return as void value
##
##   h[2] = "two"
##   h[4] = "four"
##   h[5] = "five"
##
##   doAssert h[2] == "two"
##   doAssert h[6].isNil                  # nil was initialised as void value
##
## If order relations come into play, the underlying TableRef will
## be ad-hoc extended by a sorter index. This enables simple queries
## ge(), lt(), etc. Example:
##
## .. code-block::
##   doAssert h.ge(8).isNil
##   doAssert h.gt(0) == "two"
##
## Once activated, the sorter index will stay in the background to be
## readily available. For repeated add/delete operations, the sorter
## inxed sould be flushed as it slows down these operations. This might
## not be a problem for sporadic add/delete use after order relation
## usage.
##
## To flush the sorter index use unSort() as in the example:
##
## .. code-block::
##   h.unSort()
##
## or combined with other operations
##
## .. code-block::
##   h.unSort().del(5)
##
## Although the ad-hoc nature of the sorter index does not perform
## uniformly, with C projects I observed better performance with the
## on-the-fly hash table sorters than with red-black trees under the
## following conditions:
##
##  * collect data first, rarely use order relations at this stage
##  * pretty print sorted collected data when finished
##

import
  algorithm, math, sequtils, tables

const
  minTabInit = 8
  kVoid = (not 0u64)

type
  HLst64*[T] = ref HLst64Obj[T] ## table object reference
  HLst64Obj[T] = object
    keys:   seq[uint64]
    kDirty: bool    # seq[] unsorted, may not exist or contain high(K) entries
    kStart: int     # start of valid keys in seq[], >0 => kDirty
    tab:    TableRef[uint64,T]
    ifFail: T

  KeyInxFn = proc(sorted: seq[uint64]; key: uint64): int

# ----------------------------------------------------------------------------
#  Private methods
# ----------------------------------------------------------------------------

proc inxLe(q: seq[uint64]; key: uint64): int {.inline.} =
  result = q.lowerBound(key, cmp)
  if q.len <= result or key < q[result]:
    result.dec

proc inxGe[uint64](q: seq[uint64]; key: uint64): int {.inline.} =
  result = q.lowerBound(key, cmp)
  if q.len <= result:
    result = -1

proc tabKeyList[T](st: HLst64[T]): seq[uint64] {.inline.} =
  result = newSeq[uint64](st.tab.len)
  var n = 0
  for k in st.tab.keys:
    result[n] = k.uint64
    n.inc

proc sorter[T](st: HLst64[T]): HLst64[T] {.discardable.}  =
  if st.kDirty:
    if st.keys.isNil:
      st.keys = st.tabKeyList
    st.keys.sort(cmp)
    if st.keys[st.keys.len-1] == kVoid:
      var lastValid = st.keys.inxLe(kVoid-1)
      if 0 <= lastValid:
        st.keys.setlen(lastValid+1)
    st.kStart = 0
    st.kDirty = false
  result = st

proc getKey[T](st: Hlst64[T]; inx: KeyInxFn; key: uint64): uint64 =
  if 0 < st.tab.len and key != kVoid:
    var n = st.sorter().keys.inx(key)
    if 0 <= n:
      return st.keys[n]
  result = kVoid

proc key2Val[T](st: Hlst64[T]; key: uint64): T =
  if key != kVoid:
    st.tab[key]
  else:
    st.ifFail

proc haveStartSlot[T](st: Hlst64[T]): bool {.inline.} =
  # can use start key for comparing 1st item
  (not st.kDirty and 0 < st.keys.len) or 0 < st.kStart # => not st.keys.isNil

proc unqFirstSlot[T](st: Hlst64[T]): uint64 {.discardable.} =
  assert st.haveStartSlot()
  result = st.keys[st.kStart]
  st.kDirty = true                     # update sorter list
  if st.kStart + 1 == st.keys.len:     # sorter list becomes empty?
    st.keys = nil
    st.kStart = 0
  else:
    st.keys[st.kStart] = kVoid         # set index void
    st.kStart.inc

proc disSlot[T](st: Hlst64[T]; n: int): uint64 {.discardable.} =
  assert 0 <= n and n < st.keys.len
  result = st.keys[n]
  if st.keys.len == 1:                 # last item
    st.keys = nil                      # no sorter list needed
    st.kDirty = true
  elif n + 1 == st.keys.len:           # on last slot => chop sorter list
    st.keys.setLen(n)                  # may stay clean
  else:
    st.keys[n] = kVoid                 # de-activate slot
    st.kDirty = true

# ----------------------------------------------------------------------------
#  Public methods
# ----------------------------------------------------------------------------

proc newHlst64*[T](ifFail: T; tabInit = 64): Hlst64[T] =
  ## new hashed tab with on-the-fly sorter, the ifFail argument is returned
  ## as a valie when table lookup fails (see also HLst64VoidKey())
  new result
  result.kDirty = true                   # start with: st.keys.isNil
  result.tab = newTable[uint64,T](nextPowerOfTwo(min(tabInit,minTabInit)))
  result.ifFail = ifFail

proc unSort*[T](st: Hlst64[T]): Hlst64[T] {.discardable.} =
  ## remove sorter and free resources,
  ## useful if order relations are not needed anymore
  result = st
  st.keys = nil
  st.kDirty = true
  st.kStart = 0

proc sort*[T](st: Hlst64[T]): Hlst64[T] {.discardable,inline.} =
  ## explicitely force sorter, not needed under mormal circumstances
  st.sorter()

proc len*[T](st: Hlst64[T]): int =
  ## returns the number of keys in the list
  st.tab.len

proc clear*[T](st: Hlst64[T]): Hlst64[T] {.discardable.} =
  ## resets the list so that it becomes empty
  st.unSort.tab.clear

proc HLstVoidKey*[T](st: Hlst64[T]): uint64 {.inline.} =
  ## pseudo constant, void key
  kVoid

proc HLstMaxKey*[T](st: Hlst64[T]): uint64 {.inline.} =
  ## pseudo constant, one less than void key
  kVoid - 1

proc HLstVoidVal*[T](st: Hlst64[T]): T {.inline.} =
  ## pseudo constant, void value
  st.ifFail

proc `[]=`*[T](st: Hlst64[T]; key: uint64; data: T) =
  ## add or update a value; the key argument must be smaller than high(K)
  ## which is -- in the case of int keys -- the C equivalent of INTMAX
  assert key < kVoid
  assert data != st.ifFail
  if not st.keys.isNil and not st.tab.hasKey(key):
    st.keys.add(key)
    st.kDirty = true
  st.tab[key] = data

proc del*[T](st: Hlst64[T]; key: uint64): Hlst64[T] {.discardable.} =
  ## delete item in table with key == argument key
  result = st
  if st.tab.hasKey(key):
    st.tab.del(key)                      # delete from table
    if st.keys.isNil:                    # is there a sorter list, at all?
      return                             # nothing to do

    if st.haveStartSlot() and key == st.keys[st.kStart]:
      st.unqFirstSlot()                  # some easy case (avoids sorting)
      return

    var slot = st.sorter().keys.binarySearch(key)
    if 0 <= slot:
      st.disSlot(slot)

proc hasKey*[T](st: Hlst64[T]; key: uint64): bool =
  ## Returns true key if it exists
  st.tab.hasKey(key)

proc eqKey*[T](st: Hlst64[T]; key: uint64): uint64 =
  ## Returns argument key if it exists, HLstVoidKey otherwise
  if st.tab.hasKey(key):
    key
  else:
    kVoid

proc leKey*[T](st: Hlst64[T]; key: uint64): uint64 =
  ## find maximum key less or equal argument (or HLatVoidKey)
  proc leFn(q: seq[uint64]; key: uint64): int = inxLe(q, key)
  st.getKey(leFn, key)

proc geKey*[K,T](st: Hlst64[T]; key: uint64): uint64 =
  ## find minimum key greater or equal argument (or HLatVoidKey)
  if st.haveStartSlot() and key <= st.keys[st.kStart]:
    st.keys[st.kStart]
  else:
    proc geFn(q: seq[uint64]; key: uint64): int = inxGe(q, key)
    st.getKey(geFn, key)

proc ltKey*[T](st:Hlst64[T]; key: uint64): uint64 {.inline.} =
  ## ltKey(st,key-1) is an alias for leKey(st,key-1)
  leKey[T](st,key-1)

proc gtKey*[T](st:Hlst64[T]; key: uint64): uint64 {.inline.} =
  ## gtKey(st,key-1) is an alias for geKey(st,key+1)
  geKey[T](st,key+1)

proc minKey*[T](st:HLst64[T]): uint64 {.inline.} =
  ## retrieve mimimum key
  if st.haveStartSlot():
    st.keys[st.kStart]
  elif 0 < st.tab.len:
    st.sorter().keys[0]
  else:
    kVoid

proc unqueue*[T](st: Hlst64[T]): T =
  ## retrieve/fetch item with mimimum key (or HLstVoidVal)
  if not st.haveStartSlot():
    if st.tab.len == 0:
      return st.ifFail
    st.sorter()
  var key = st.unqFirstSlot()
  result = st.tab[key]                        # return value from table
  st.tab.del(key)                             # remove table entry

proc pop*[T](st: Hlst64[T]): T =
  ## retrieve/fetch item with maximum key (or HLstVoidVal)
  if st.kDirty:
    if st.tab.len == 0:
      return st.ifFail
    st.sorter()
  var key = st.disSlot(st.keys.len-1)
  result = st.tab[key]
  st.tab.del(key)

proc eq*[T](st: Hlst64[T]; key: uint64): T =
  ## find value in table with key == argument,
  ## returns HLstVoidVal if there is no such key
  if st.tab.hasKey(key):
    st.tab[key]
  else:
    st.ifFail

proc `[]`*[T](st:Hlst64[T]; key: uint64): T {.inline.} =
  ## st[key] is an alias for eq(st,key)
  eq[T](st,key)

proc le*[T](st: Hlst64[T]; key: uint64): T =
  ## find value in table where key is maximum <= argument,
  ## returns HLstVoidVal if there is no such key
  proc leFn(q: seq[uint64]; key: uint64): int = inxLe(q, key)
  st.key2val(st.getKey(leFn, key))

proc ge*[T](st: Hlst64[T]; key: uint64): T =
  ## find value in table where key is minimum >= argument,
  ## returns HLstVoidVal if there is no such key
  if st.haveStartSlot() and key <= st.keys[st.kStart]:
    st.tab[st.keys[st.kStart]]
  else:
    proc geFn(q: seq[uint64]; key: uint64): int = inxGe(q, key)
    st.key2val(st.getKey(geFn, key))

proc lt*[T](st:Hlst64[T]; key: uint64): T {.inline.} =
  ## lt(st,key) is an alias for le(st,key-1)
  le[T](st, key - 1)

proc gt*[T](st:Hlst64[T]; key: uint64): T {.inline.} =
  ## gt(st,key) is an alias for ge(st,key+1)
  ge[T](st, key + 1)

iterator pairs*[T](st: Hlst64[T]): (uint64, T)  {.inline.} =
  ## iterates over any (key,value) pair in the table st
  for k,v in st.tab.pairs:
    yield (k.uint64,v)

iterator mpairs*[T](st: Hlst64[T]): (uint64, var T) {.inline.} =
  ## iterates over any (key,value) pair in the table st,
  ## the values can be modified
  for k,v in st.tab.mpairs:
    yield (k.uint64,v)

iterator keys*[T](st: Hlst64[T]): uint64  {.inline.} =
  ## iterates over any key in the table st
  for k in st.tab.keys:
    yield k.uint64

iterator values*[T](st: Hlst64[T]): T {.inline.} =
  ## iterates over any value in the table st
  for v in st.tab.values:
    yield v

iterator mvalues*[T](st: Hlst64[T]): var T {.inline.} =
  ## iterates over any value in the table st,
  ## the values can be modified
  for v in st.tab.mvalues:
    yield v

iterator sortedKeys*[T](st: Hlst64[T]): uint64  {.inline.} =
  ## like keys() but over sorted table
  if st.tab.len != 0:
    if not st.haveStartSlot():
      st.sorter()
    for n in st.kStart..<st.keys.len:
      yield st.keys[n]

iterator sortedPairs*[T](st: Hlst64[T]): (uint64, T)  {.inline.} =
  ## like pairs() but over sorted table
  for k in st.sortedKeys:
    yield (k,st.tab[k])

# ----------------------------------------------------------------------------
# Tests
# ----------------------------------------------------------------------------

when isMainModule:

  var h = newHLst64[string](nil,5)

  h[2] = "two"
  h[4] = "four"
  h[5] = "five"

  doAssert h.le(8) == "five"
  doAssert h.le(4) == "four"
  doAssert h.le(3) == "two"
  doAssert h.le(1).isNil

  doAssert h.lt(8) == "five"
  doAssert h.lt(4) == "two"
  doAssert h.lt(3) == "two"
  doAssert h.lt(2).isNil
  doAssert h.lt(1).isNil

  doAssert h.ltKey(8) == 5
  doAssert h.ltKey(4) == 2
  doAssert h.ltKey(3) == 2
  doAssert h.ltKey(2) == h.HLstVoidKey
  doAssert h.ltKey(1) == h.HLstVoidKey

  doAssert h.del(4).le(4) == "two"
  doAssert h[4].isNil

  h[7] = "seven"

  doAssert h[6].isNil
  doAssert h.ge(6) == "seven"
  doAssert h.ge(7) == "seven"
  doAssert h.ge(8).isNil

  doAssert h.gt(0) == "two"
  doAssert h.gt(5) == "seven"
  doAssert h.gt(7).isNil
  doAssert h.gt(8).isNil

  doAssert h.pop() == "seven"
  doAssert h.pop() == "five"
  doAssert h.pop() == "two"
  doAssert h.pop().isNil

  h[4] = "four"
  h[5] = "five"
  h[2] = "two"

  doAssert h.unSort.unqueue() == "two"
  doAssert h.unSort.unqueue() == "four"
  doAssert h.unSort.unqueue() == "five"
  doAssert h.unSort.unqueue().isNil

#  echo ">>> ", h.repr
#  when not defined(check_run):
#    echo "*** No tests, yet"

# ----------------------------------------------------------------------------
# End
# ----------------------------------------------------------------------------