
package dbmodel

/** Version 11.1
 *
 * A number of aggregate functions are often needed in queries
 *  on collections. Some are provided below for illustrations:
 *
 *  [[biggest]], [[smallest]], 
 *  [[count]], [[sum]], [[average]], 
 *  [[maximize]], [[minimize]]
 *
 *  Also, some combinators [[combine]] are provided for combining
 *  aggregate functions so that multiple aggregate functions can
 *  be computed in a single pass through a collection.
 *
 *  Wong Limsoon
 *  13 May 2023
 */


  object OpG: 


    import scala.language.implicitConversions
    import dbmodel.Synchronizable.CBI
    import dbmodel.OrderedCollection.OColl
    import dbmodel.Predicates.DBNumeric

    type Bool = Boolean       



    /** [[Aggr(e, iter, done)]] represents an aggregate function
     *  equivalent to [[done o foldLeft(e, iter)]]. This "trampolin"
     *  form exposes the key component defining the aggregate function.
     *  This makes it possible to later apply combinators to combine
     *  multiple aggregate functions in a step-wise manner.
     */

    case class Aggr[B,C,+D](e: C, iter: (B,C) => C, done: C => D):

      /** Apply this aggregate function to a whole collection.
      */
      def of(bs: IterableOnce[B]): D =
        var acc: C = e
        val it = bs.iterator
        while it.hasNext do acc = iter(it.next(), acc)
        done(acc)

      def apply(bs: IterableOnce[B]): D = of(bs)

      /** Patition a collection based on its key.
       *  Apply this aggregate function to each partition separately.
       */
      def byPartition[K,E](bi: OColl[B,K], output: (K,D) => E): CBI[E] =
        bi.clusteredFold(e, iter, done, output)

    object Aggr:
      def unapply[B,C,D](a: Aggr[B,C,D]) = Some((a.e, a.iter, a.done))

    end Aggr



    /** In [[Aggr[B,C,D]] above, [[C]] is the type of the intermediate
     *  data in computing an aggregate function. Users mainly want to
     *  know the input type [[B]] and output type [[D]]. So, we use a 
     *  simple typecasting trick below to hide this [[C]].
     *
     *  Instead of constructing [[Aggr(e, iter, done)]], construct the
     *  aggregate function using [[AGGR(e, iter, done)]] which 
     *  typecasts away the intermediate type [[C]].
     */

    type AGGR[B,+D] = Aggr[B,Any,D]

    def AGGR[B,C,D](e: C, iter: (B,C) => C, done: C => D): AGGR[B,D] =
      Aggr(e, iter, done).asInstanceOf[AGGR[B,D]]


    /** Combinators to combine aggregate functions.
     */

    def combine[B,D1,D2,D](f: AGGR[B,D1], g: AGGR[B,D2])(h: (D1,D2) => D): AGGR[B,D] =
      val e = (f.e, g.e)
      val (fi, gi) = (f.iter, g.iter)
      val (fd, gd) = (f.done, g.done)
      def iter(b: B, a: (Any,Any)) = (fi(b, a._1), gi(b, a._2))
      def done(a: (Any,Any)) = h(fd(a._1), gd(a._2))
      AGGR(e, iter, done)

    def combine[B,D](aggrs: (String, AGGR[B,D])*): AGGR[B,Map[String,D]] =
      val as = aggrs.view.map { case (f, aggr) => f -> aggr.e }.toMap
      val gs = aggrs.view.map { case (f, aggr) => f -> aggr.iter }.toMap
      val ds = aggrs.view.map { case (f, aggr) => f -> aggr.done }.toMap
      val iter = (b: B, as: Map[String,Any]) => 
        as.map { case (f,a) => f -> gs(f)(b, a) }  
      val done = (as: Map[String,Any]) =>
        as.map { case (f,a) => f -> ds(f)(a) }
      AGGR(as, iter, done)



    /** Example aggregate functions
     */

    def SMALLEST[B,C: DBNumeric](f: B => C): AGGR[B,C] =
      val numeric = summon[DBNumeric[C]] 
      val e = numeric.MaxValue   
      val iter = (b: B, a: C) => numeric.min(f(b), a)
      val done = (c: C) => c
      AGGR[B,C,C](e, iter, done)
    
 
    def BIGGEST[B,C: DBNumeric](f: B => C): AGGR[B,C] =
      val numeric = summon[DBNumeric[C]]
      val e = numeric.MinValue   
      val iter = (b: B, a: C) => numeric.max(f(b), a)
      val done = (c: C) => c
      AGGR[B,C,C](e, iter, done)


    def COUNT[B]: AGGR[B,Int] = 
      AGGR[B,Int,Int](e = 0, iter = (_, a) => a + 1, done = c => c)


    def SUM[B, C: DBNumeric](f: B => C): AGGR[B,C] =
      val numeric = summon[DBNumeric[C]]
      val e = numeric.fromInt(0)
      val iter = (b: B, a: C) => numeric.plus(f(b), a)
      val done = (c: C) => c
      AGGR[B,C,C](e, iter, done)


    def PROD[B, C:DBNumeric](f: B => C): AGGR[B,C] =
      val numeric = summon[DBNumeric[C]]
      val e = numeric.fromInt(1)
      val iter = (b: B, a: C) => numeric.times(f(b), a)
      val done = (c: C) => c
      AGGR[B,C,C](e, iter, done)


    def AVERAGE[B](f: B => Double): AGGR[B,Double] =
      combine(COUNT[B], SUM(f)) { (c, s) => if (c != 0) s.toDouble/c.toDouble else 0.0 }


    def MEDIAN[B](f: B => Double): AGGR[B,Double] =
      import scala.collection.mutable.Queue
      val q = Queue[Double]()
      val e = (true, q)
      def iter(b: B, a: (Bool, Queue[Double])) =
        val (even, q) = a
        if even && !q.isEmpty then q.dequeue()
        (!even, q.enqueue(f(b)))
      def done(a: (Bool, Queue[Double])) =
        val (even, q) = a
        if even then (q.dequeue() + q.dequeue()) /2  else q.dequeue()
      AGGR[B,(Bool,Queue[Double]),Double](e, iter, done)


    def MINIMIZE[B,C: DBNumeric](f: B => C): AGGR[B,(C,List[B])] =
      val numeric = summon[DBNumeric[C]]
      val ord = numeric.ord
      val e = (numeric.MaxValue, List[B]())
      val iter = (b: B, a: (C, List[B])) =>
        val cur = f(b)
        val min = a._1
        val acc = a._2
        ord.compare(min, cur) match
          case -1 => a
          case  0 => (min, b +: acc)
          case  _ => (cur, List(b))
      val done = (min: C, acc: List[B]) => (min, acc.reverse) 
      AGGR[B,(C,List[B]),(C,List[B])](e, iter, done.tupled)


    def MAXIMIZE[B,C: DBNumeric](f: B => C): AGGR[B,(C,List[B])] =
      val numeric = summon[DBNumeric[C]]
      val ord = numeric.ord
      val e   = (numeric.MinValue, List[B]())
      val iter = (b: B, a: (C, List[B])) =>
        val cur = f(b)
        val max = a._1
        val acc = a._2
        ord.compare(cur, max) match
          case -1 => a
          case  0 => (max, b +: acc)
          case  _ => (cur, List(b))
      val done = (max: C, acc: List[B]) => (max, acc.reverse)
      AGGR[B,(C,List[B]),(C,List[B])](e, iter, done.tupled)


    def STATS[B](f: B=>Double): AGGR[B,Map[String,Double]] = {
      def sq(n: Double) = n * n
      val aggrs = combine[B,Double](
        "count" -> SUM[B,Double](b => 1), 
        "sum"   -> SUM[B,Double](f),
        "sumsq" -> SUM[B,Double](b => sq(f(b))), 
        "min"   -> SMALLEST[B,Double](f), 
        "max"   -> BIGGEST[B,Double](f))
      val Some((e, iter, done)) = Aggr.unapply(aggrs)
      def welldone(any: Any) =
        val numbers = any.asInstanceOf[Map[String,Double]]
        val cnt = numbers("count")
        val sum = numbers("sum")
        val ssq = numbers("sumsq")
        val min = numbers("min")
        val max = numbers("max")
        val ave = if cnt != 0 then sum/cnt else 0
        val vre = if cnt>1 then (ssq + (cnt*ave*ave) - (2*sum*ave))/(cnt-1) else 0
        numbers ++ Map("average" -> ave, "variance" -> vre)
      AGGR(e, iter, welldone)
    }


    /** Endow collections with aggregate functions
     */

    extension [B](it: IterableOnce[B]) 
      def smallest[C: DBNumeric](f: B => C): C = SMALLEST(f) of it
      def biggest[C: DBNumeric](f: B => C): C = BIGGEST(f) of it
      def count: Int = COUNT of it
      def sum[C: DBNumeric](f: B => C): C = SUM(f) of it
      def prod[C: DBNumeric](f: B => C): C = PROD(f) of it
      def average(f: B => Double): Double = AVERAGE(f) of it
      def median(f: B => Double): Double = MEDIAN(f) of it
      def minimize[C: DBNumeric](f: B => C): (C, List[B]) = MINIMIZE(f) of it
      def maximize[C: DBNumeric](f: B => C): (C, List[B]) = MAXIMIZE(f) of it
      def stats(f: B => Double): Map[String, Double] = STATS(f) of it


  end OpG 






/** Examples ***********************************************

{{{

   import dbmodel.OrderedCollection.{ given, * }
   import dbmodel.OpG.{ given, * }
   import dbmodel.Synchrony.siterator
   import scala.language.implicitConversions



// aa is a sorted table.


   val aa = OSeq(1,2,3,4,5,6,7,7,8,8,9)


// for each a <- aa,
// return items b in aa s.t. abs(b - a) < 3.
// return also a count of such b's and their sum.


   {
     def cs(b: Int, a: Int) = (b - a).abs < 3
     val bi = aa.siterator(aa.key, cs)
     for 
       a <- aa
       bs = bi.syncedWith(a)
     do
     println((a, bs, COUNT of bs, SUM((b: Int) => b) of bs))
   }


// Output should be:


(1,Vector(1, 2, 3),3,6)
(2,Vector(1, 2, 3, 4),4,10)
(3,Vector(1, 2, 3, 4, 5),5,15)
(4,Vector(2, 3, 4, 5, 6),5,20)
(5,Vector(3, 4, 5, 6, 7, 7),6,32)
(6,Vector(4, 5, 6, 7, 7, 8, 8),7,45)
(7,Vector(5, 6, 7, 7, 8, 8, 9),7,50)
(7,Vector(5, 6, 7, 7, 8, 8, 9),7,50)
(8,Vector(6, 7, 7, 8, 8, 9),6,45)
(8,Vector(6, 7, 7, 8, 8, 9),6,45)
(9,Vector(7, 7, 8, 8, 9),5,39)


// End output



}}}

*************************************************************/


