# -*- nim -*-
#
# $Id: 4e762ba679d9afef81a000625b956ab86254e316 $
#
# hash table wrapper with sorter
#
# Copyright (c) 2017 Jordan Hrycaj <jordan@teddy-net.com>
# All rights reserved.
#
# Permission to use, copy, modify, and distribute this software for any
# purpose with or without fee is hereby granted, provided that the above
# copyright notice and this permission notice appear in all copies.
#
# The author or authors of this code dedicate any and all copyright interest
# in this code to the public domain. We make this dedication for the benefit
# of the public at large and to the detriment of our heirs and successors.
# We intend this dedication to be an overt act of relinquishment in
# perpetuity of all present and future rights to this code under copyright
# law.
#
# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
# ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
# OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
#

## Hash table extension with on-the-fly sorter for signed integer keys (see
## Hlst64 for uint64 keys).
##
## This module works as a TableRef (see the table module) unless order
## relations are needed, for instance:
##
## .. code-block::
##   import hlst
##
##   var h = newHLst[int,string](nil,5)   # nil: return as void value
##
##   h[2] = "two"
##   h[4] = "four"
##   h[5] = "five"
##
##   doAssert h[2] == "two"
##   doAssert h[6].isNil                  # nil was initialised as void value
##
## If order relations come into play, the underlying TableRef will
## be ad-hoc extended by a sorter index. This enables simple queries
## ge(), lt(), etc. Example:
##
## .. code-block::
##   doAssert h.ge(8).isNil
##   doAssert h.gt(0) == "two"
##
## Once activated, the sorter index will stay in the background to be
## readily available. For repeated add/delete operations, the sorter
## inxed sould be flushed as it slows down these operations. This might
## not be a problem for sporadic add/delete use after order relation
## usage.
##
## To flush the sorter index use unSort() as in the example:
##
## .. code-block::
##   h.unSort()
##
## or combined with other operations
##
## .. code-block::
##   h.unSort().del(5)
##
## Although the ad-hoc nature of the sorter index does not perform
## uniformly, with C projects I observed better performance with the
## on-the-fly hash table sorters than with red-black trees under the
## following conditions:
##
##  * collect data first, rarely use order relations at this stage
##  * pretty print sorted collected data when finished
##

import
  algorithm, math, sequtils, tables

const
  minTabInit = 8

type
  HLst*[K,T] = ref HLstObj[K,T] ## table object reference
  HLstObj[K,T] = object
    keys:   seq[K]
    kDirty: bool    # seq[] unsorted, may not exist or contain high(K) entries
    kStart: int     # start of valid keys in seq[], >0 => kDirty
    tab:    TableRef[K,T]
    ifFail: T

  KeyInxFn[K] = proc(sorted: seq[K]; key: K): int

# ----------------------------------------------------------------------------
#  Private methods
# ----------------------------------------------------------------------------

proc inxLe[K](q: seq[K]; key: K): int {.inline.} =
  result = q.lowerBound(key, cmp)
  if q.len <= result or key < q[result]:
    result.dec

proc inxGe[K](q: seq[K]; key: K): int {.inline.} =
  result = q.lowerBound(key, cmp)
  if q.len <= result:
    result = -1

proc sorter[K,T](st: HLst[K,T]): HLst[K,T] {.discardable.}  =
  if st.kDirty:
    if st.keys.isNil:
      st.keys = toSeq(st.tab.keys())
    st.keys.sort(cmp)
    if st.keys[st.keys.len-1] == high(K).K:
      var lastValid = st.keys.inxLe(high(K).K-1)
      if 0 <= lastValid:
        st.keys.setlen(lastValid+1)
    st.kStart = 0
    st.kDirty = false
  result = st

proc getKey[K,T](st: HLst[K,T]; inx: KeyInxFn[K]; key: K): K =
  if 0 < st.tab.len and key != high(K).K:
    var n = st.sorter().keys.inx(key)
    if 0 <= n:
      return st.keys[n]
  result = high(K).K

proc key2Val[K,T](st: HLst[K,T]; key: K): T =
  if key != high(K).K:
    st.tab[key]
  else:
    st.ifFail
  
proc haveStartSlot[K,T](st: HLst[K,T]): bool {.inline.} =
  # can use start key for comparing 1st item
  (not st.kDirty and 0 < st.keys.len) or 0 < st.kStart # => not st.keys.isNil

proc unqFirstSlot[K,T](st: HLst[K,T]): K {.discardable.} =
  assert st.haveStartSlot()
  result = st.keys[st.kStart]
  st.kDirty = true                     # update sorter list
  if st.kStart + 1 == st.keys.len:     # sorter list becomes empty?
    st.keys = nil
    st.kStart = 0
  else:
    st.keys[st.kStart] = high(K).K     # set index void
    st.kStart.inc

proc disSlot[K,T](st: HLst[K,T]; n: int): K {.discardable.} =
  assert 0 <= n and n < st.keys.len
  result = st.keys[n]
  if st.keys.len == 1:                 # last item
    st.keys = nil                      # no sorter list needed
    st.kDirty = true
  elif n + 1 == st.keys.len:           # on last slot => chop sorter list
    st.keys.setLen(n)                  # may stay clean
  else:
    st.keys[n] = high(K).K             # de-activate slot
    st.kDirty = true

# ----------------------------------------------------------------------------
#  Public methods
# ----------------------------------------------------------------------------

proc newHlst*[K,T](ifFail: T; tabInit = 64): HLst[K,T] =
  ## new hashed tab with on-the-fly sorter, the ifFail argument is returned
  ## as a valie when table lookup fails (see also HLstVoidKey())
  new result
  result.kDirty = true                   # start with: st.keys.isNil
  result.tab = newTable[K,T](nextPowerOfTwo(min(tabInit,minTabInit)))
  result.ifFail = ifFail

proc unSort*[K,T](st: HLst[K,T]): HLst[K,T] {.discardable.} =
  ## remove sorter and free resources,
  ## useful if order relations are not needed anymore
  result = st
  st.keys = nil
  st.kDirty = true
  st.kStart = 0

proc sort*[K,T](st: HLst[K,T]): HLst[K,T] {.discardable,inline.} =
  ## explicitely force sorter, not needed under mormal circumstances
  st.sorter()

proc len*[K,T](st: HLst[K,T]): int =
  ## returns the number of keys in the list
  st.tab.len

proc clear*[K,T](st: HLst[K,T]): HLst[K,T] {.discardable.} =
  ## resets the list so that it becomes empty
  st.unSort.tab.clear

proc HLstVoidKey*[K,T](st: HLst[K,T]): K {.inline.} =
  ## pseudo constant, void key
  high(K).K

proc HLstMaxKey*[K,T](st: HLst[K,T]): K {.inline.} =
  ## pseudo constant, one less than void key
  high(K).K - 1

proc HLstVoidVal*[K,T](st: HLst[K,T]): T {.inline.} =
  ## pseudo constant, void value
  st.ifFail

proc `[]=`*[K,T](st: HLst[K,T]; key: K; data: T) =
  ## add or update a value; the key argument must be smaller than high(K)
  ## which is -- in the case of int keys -- the C equivalent of INTMAX
  assert key < high(K).K
  assert data != st.ifFail
  if not st.keys.isNil and not st.tab.hasKey(key):
    st.keys.add(key)
    st.kDirty = true
  st.tab[key] = data

proc del*[K,T](st: HLst[K,T]; key: K): HLst[K,T] {.discardable.} =
  ## delete item in table with key == argument key
  result = st
  if st.tab.hasKey(key):
    st.tab.del(key)                      # delete from table
    if st.keys.isNil:                    # is there a sorter list, at all?
      return                             # nothing to do

    if st.haveStartSlot() and key == st.keys[st.kStart]:
      st.unqFirstSlot()                  # some easy case (avoids sorting)
      return

    var slot = st.sorter().keys.binarySearch(key)
    if 0 <= slot:
      st.disSlot(slot)

proc hasKey*[K,T](st: HLst[K,T]; key: K): bool =
  ## Returns true key if it exists
  st.tab.hasKey(key)

proc eqKey*[K,T](st: HLst[K,T]; key: K): K =
  ## Returns argument key if it exists, HLstVoidKey otherwise
  if st.tab.hasKey(key):
    key
  else:
    high(K).K

proc leKey*[K,T](st: HLst[K,T]; key: K): K =
  ## find maximum key less or equal argument (or HLatVoidKey)
  proc leFn(q: seq[K]; key: K): int = inxLe[K](q, key)
  st.getKey(leFn, key)

proc geKey*[K,T](st: HLst[K,T]; key: K): K =
  ## find minimum key greater or equal argument (or HLatVoidKey)
  if st.haveStartSlot() and key <= st.keys[st.kStart]:
    st.keys[st.kStart]
  else:
    proc geFn(q: seq[K]; key: K): int = inxGe[K](q, key)
    st.getKey(geFn, key)

proc ltKey*[K,T](st:HLst[K,T]; key:K): K {.inline.} =
  ## ltKey(st,key-1) is an alias for leKey(st,key-1)
  leKey[K,T](st,key-1)

proc gtKey*[K,T](st:HLst[K,T]; key:K): K {.inline.} =
  ## gtKey(st,key-1) is an alias for geKey(st,key+1)
  geKey[K,T](st,key+1)

proc minKey*[K,T](st:HLst[K,T]): K {.inline.} =
  ## retrieve mimimum key
  if st.haveStartSlot():
    st.keys[st.kStart]
  elif 0 < st.tab.len:
    st.sorter().keys[0]
  else:
    high(K).K

proc unqueue*[K,T](st: HLst[K,T]): T =
  ## retrieve/fetch item with mimimum key (or HLstVoidVal)
  if not st.haveStartSlot():
    if st.tab.len == 0:
      return st.ifFail
    st.sorter()
  var key = st.unqFirstSlot()
  result = st.tab[key]                        # return value from table
  st.tab.del(key)                             # remove table entry

proc pop*[K,T](st: HLst[K,T]): T =
  ## retrieve/fetch item with maximum key (or HLstVoidVal)
  if st.kDirty:
    if st.tab.len == 0:
      return st.ifFail
    st.sorter()
  var key = st.disSlot(st.keys.len-1)
  result = st.tab[key]
  st.tab.del(key)

proc eq*[K,T](st: HLst[K,T]; key: K): T =
  ## find value in table with key == argument,
  ## returns HLstVoidVal if there is no such key
  if st.tab.hasKey(key):
    st.tab[key]
  else:
    st.ifFail

proc `[]`*[K,T](st:HLst[K,T]; key:K): T {.inline.} =
  ## st[key] is an alias for eq(st,key)
  eq[K,T](st,key)

proc le*[K,T](st: HLst[K,T]; key: K): T =
  ## find value in table where key is maximum <= argument,
  ## returns HLstVoidVal if there is no such key
  proc leFn(q: seq[K]; key: K): int = inxLe[K](q, key)
  st.key2val(st.getKey(leFn, key))

proc ge*[K,T](st: HLst[K,T]; key: K): T =
  ## find value in table where key is minimum >= argument,
  ## returns HLstVoidVal if there is no such key
  if st.haveStartSlot() and key <= st.keys[st.kStart]:
    st.tab[st.keys[st.kStart]]
  else:
    proc geFn(q: seq[K]; key: K): int = inxGe[K](q, key)
    st.key2val(st.getKey(geFn, key))

proc lt*[K,T](st:HLst[K,T]; key:K): T {.inline.} =
  ## lt(st,key) is an alias for le(st,key-1)
  le[K,T](st,key-1)

proc gt*[K,T](st:HLst[K,T]; key:K): T {.inline.} =
  ## gt(st,key) is an alias for ge(st,key+1)
  ge[K,T](st,key+1)

iterator pairs*[K,T](st: HLst[K,T]): (K, T)  {.inline.} =
  ## iterates over any (key,value) pair in the table st
  for k,v in st.tab.pairs:
    yield (k,v)

iterator mpairs*[K,T](st: HLst[K,T]): (K, var T) {.inline.} =
  ## iterates over any (key,value) pair in the table st,
  ## the values can be modified
  for k,v in st.tab.mpairs:
    yield (k,v)

iterator keys*[K,T](st: HLst[K,T]): K  {.inline.} =
  ## iterates over any key in the table st
  for k in st.tab.keys:
    yield k

iterator values*[K,T](st: HLst[K,T]): T {.inline.} =
  ## iterates over any value in the table st
  for v in st.tab.values:
    yield v

iterator mvalues*[K,T](st: HLst[K,T]): var T {.inline.} =
  ## iterates over any value in the table st,
  ## the values can be modified
  for v in st.tab.mvalues:
    yield v

iterator sortedKeys*[K,T](st: HLst[K,T]): K  {.inline.} =
  ## like keys() but over sorted table
  if st.tab.len != 0:
    if not st.haveStartSlot():
      st.sorter()
    for n in st.kStart..<st.keys.len:
      yield st.keys[n]

iterator sortedPairs*[K,T](st: HLst[K,T]): (K, T)  {.inline.} =
  ## like pairs() but over sorted table
  for k in st.sortedKeys:
    yield (k,st.tab[k])

# ----------------------------------------------------------------------------
# Tests
# ----------------------------------------------------------------------------

when isMainModule:

  var h = newHLst[int,string](nil,5)

  h[2] = "two"
  h[4] = "four"
  h[5] = "five"

  doAssert h.minKey == 2
  doAssert h.le(8)  == "five"
  doAssert h.le(4)  == "four"
  doAssert h.le(3)  == "two"
  doAssert h.le(1).isNil

  doAssert h.lt(8) == "five"
  doAssert h.lt(4) == "two"
  doAssert h.lt(3) == "two"
  doAssert h.lt(2).isNil
  doAssert h.lt(1).isNil

  doAssert h.ltKey(8) == 5
  doAssert h.ltKey(4) == 2
  doAssert h.ltKey(3) == 2
  doAssert h.ltKey(2) == h.HLstVoidKey
  doAssert h.ltKey(1) == h.HLstVoidKey

  doAssert h.del(4).le(4) == "two"
  doAssert h[4].isNil

  h[7] = "seven"

  doAssert h[6].isNil
  doAssert h.ge(6) == "seven"
  doAssert h.ge(7) == "seven"
  doAssert h.ge(8).isNil

  doAssert h.gt(0) == "two"
  doAssert h.gt(5) == "seven"
  doAssert h.gt(7).isNil
  doAssert h.gt(8).isNil

  doAssert h.pop == "seven"
  doAssert h.pop == "five"
  doAssert h.pop == "two"
  doAssert h.pop.isNil

  h[4] = "four"
  h[5] = "five"
  h[2] = "two"

  doAssert h.minKey         == 2
  doAssert h.unSort.unqueue == "two"
  doAssert h.unSort.unqueue == "four"
  doAssert h.unSort.unqueue == "five"
  doAssert h.unSort.unqueue.isNil

#  echo ">>> ", h.repr

#  when not defined(check_run):
#    echo "*** No tests, yet"

# ----------------------------------------------------------------------------
# End
# ----------------------------------------------------------------------------