


package dbmodel

  /** Version 11.1 
   *
   *  A simple model to turn files into ordered collection. Ordered
   *  collection comes with Synchrony iterator, which provides efficient
   *  general synchronized iteration over multiple collections.
   *
   *  Wong Limsoon
   *  13 May 2023
   */




  object DBFile:

    import scala.language.implicitConversions
    import java.nio.file.{ Files, Paths, StandardCopyOption }
    import java.nio.charset.StandardCharsets.UTF_8
    import java.util.Base64
    import java.io.{
      BufferedWriter, PrintWriter, File, EOFException, IOException,
      ObjectInputStream, ByteArrayInputStream,
      ByteArrayOutputStream, ObjectOutputStream 
    }

    import scala.collection.BufferedIterator
    import dbmodel.Synchronizable.CBI
    import dbmodel.OrderedCollection.{ OColl, OSeq, Key }


    var TMP: String = "."         // Default folder for temporary files


    type Ord[K]    = Ordering[K]  // Shorthand

    type Bool      = Boolean      // Shorthand

    /** A [[PARSER]] parses a few lines at a time until a object
     *  can be constructed. It returns the object.
     *  The second (Int) parameter tells the parser which
     *  entry it is now parsing. 
     *  The initial string can be used for specifying some
     *  initialization parameters.
     */
    type PARSER[B] = String => (Iterator[String], Int) => B  // Type of parser.

    /** A [[FORMATTER]] converts/formats an object into a string for 
     *  writing to file. The second (Int) parameter tells the unparser
     *  which entry it is formatting; e.g., it may need to generate
     *  header info for 1st entry.
     *  The initial string can be used for specifying some
     *  initialization parameters.
     */
    type FORMATTER[B] = String => (B, Int) => String // Type of an formatter.

    /** A file destructor deletes a given file. Some descendent
     *  types of [[OFile]] may be nested files. Such associated
     *  files may need to be deleted as well.
     */
    type DESTRUCTOR[B] = OFile[B,_] => Unit        // Type of file destructor.



    /** [[FileIterator(filename, parser)]] uses the [[parser]] to parse
     *  the file [[filename]] into an iterator on items in the file.
     *
     *  @param filename  is name of the file.
     *  @param parser is a parser for the file.
     */

    case class FileIterator[B](filename: String, parser: PARSER[B])
      extends CBI[B]:

      val file   =                 // File handle
        if !Files.isRegularFile(Paths.get(filename)) then null
        else scala.io.Source.fromFile(filename)

      override def close(): Unit  = if file != null then file.close()

      var n = 0                       // Position of current item

      var hd: B = _                   // Most-recent parsed item

      val it = CBI {
        new Iterator[B] {
          val parse = parser(filename)  // Parser initialized
          val ois =                     // Remaining lines in file
            if file == null then Iterator() 
            else file.getLines().map(_.trim) 
  
          def fetch(): Bool =           // Read next item 
            try { hd = parse(ois, n); n = n + 1; true } 
            catch 
              case e: EOFException => { close(); false }
              case e: NoSuchElementException => { close(); false }
              case e: IOException => { close(); false }
              case e: Throwable => { close(); throw e }

          def hasNext = fetch()
          def next() = hd       
        }
      }

      @inline def atEnd = it.atEnd
      @inline def shift() = it.shift()
      @inline def discard() = it.discard()
      @inline def rewind() = it.rewind()
      @inline def b = it.b
      @inline def bs() = it.bs()
    
      

    object FileIterator:

      /** Function to convert an [[Iterator]] to a [[FileIterator]]
       */

      def apply[B](
             it: Iterator[B],
             formatter: FORMATTER[B] = OFile.defaultFormatter[B],
             parser: PARSER[B] = OFile.defaultParser[B],
             filename: String = "", 
             folder: String = TMP)
           : FileIterator[B] =
          val afile = tofile(it, formatter, filename, folder)
          FileIterator(afile, parser)


      /** Function to write an [[Iterator]] to a file.
       */
    
      def tofile[B](
             it: Iterator[B],
             formatter: FORMATTER[B] = OFile.defaultFormatter[B],
             filename: String = "", 
             folder: String = TMP)
          : String = 
    
        /** Open the file for writing.
         */

        val fname  = OFile.mkFileName(filename, folder).toString
        val format = formatter(fname)    

        /** Write items in [[it]] to the file.
         */

        val oos = new BufferedWriter(new PrintWriter(new File(fname)))
        var n   = 0
        while (it.hasNext)
          oos.write(format(it.next(), n))
          oos.newLine()
          n = n + 1
          if n % 10000 == 0 then oos.flush()
      

        /** Close the file.
         */

        oos.flush()
        oos.close()

        /** Return the file name.
         */

        return fname
  

    end FileIterator



    /** Implicit classes to provide nicer/more convenient syntax
     *  for converting between file name and iterators.
     *
     *  [[filename.fiterator(parser)]] converts [[fname]], 
     *  a presumed file name, into [[FIterator]].
     *
     *  [[it.file(formatter)(filename)]] writes [[it]] to 
     *  a file [[filename]].
     */
  
    extension [B](filename: String)
      def fiterator(parser: PARSER[B] = OFile.defaultParser[B]): FileIterator[B] =
        FileIterator[B](filename, parser)


    extension [B](it: Iterator[B])
      def tofile(
             formatter: FORMATTER[B] = OFile.defaultFormatter[B],
             filename: String = "", 
             folder: String = TMP)
          : String = 
        FileIterator.tofile(it, formatter, filename, folder)



    /** [[OFile(key, parser, formatter)]] represents a large possibly
     *  transient ordered collection [[OColl]], e.g. a dynamically
     *  produced data stream. The collection is assumed ordered by [[key]].
     *  Items in the collection can be written to a file using [[formatter]]
     *  and read back using [[parser]] as needed. This is useful, e.g.,
     *  when collection has to be sorted.
     */

    sealed trait OFile[B,K] extends OColl[B,K]:
  
      /** To be provided by inheriting instances:
       */

      val key: Key[B,K]

      val filename = "" 
        // Set [[filename]] to "" if [[OFile]] is
        // a transient collection; i.e., not on disk.

      def elems: IterableOnce[B] = null
        // Set [[elems]] to null if [[OFile]] is on disk.

      def parser: PARSER[B]
      def formatter: FORMATTER[B]
      def destructor: DESTRUCTOR[B]


      /** [[OFile]] has three different characteristics:
       *  - [[ODisk]], the file is on disk. 
       *  - [[OMemory]], the file is materialized in memory. 
       *  - [[OTransient]], the file is transient (i.e., still being produced.)
       */

      type OCOL[N] = OFile[B,N]

      def OCOL[N](bs: IterableOnce[B], kB: Key[B,N]): OCOL[N] =
        bs match
          case bi: Iterable[B] => omemory(bi, kB)
          case _ => oitr(bs, kB)

      def oitr[N](bs: IterableOnce[B], kB: Key[B,N]) =
        OTransient(kB, bs, parser, formatter, destructor)
 
      def odisk[N](filename: String, kB: Key[B,N]) =
        ODisk(kB, filename, parser, formatter, destructor)

      def omemory[N](bs: Iterable[B], kB: Key[B,N]) =
        OMemory(kB, bs, parser, formatter, destructor)


      /** An [[OFile]] may be associated with a temporary file, 
       *  which needs to be destroyed / garbage collected after use.
       */

      var protect = false 
        // If true, protect this [[OFile]] from destruction

      def protection(flag: Bool = true): this.type = 
        protect = flag
        this


      /** Turn on protection by default if this [[OFile]] is on disk.
       */

      protection(filename != "")


      /** Delete the underlying file, and releasing resources.
       */

      def destruct(): Unit = destructor(this)

      override def close(): Unit = 
        if !protect then
          destruct()
          held.foreach { _.close() }
          held = Seq()


      /** For [[OFile]] on disk, [[elems]] is null.
       *  Need to open and read the file as needed.
       */

      override def fileElems: CBI[B] = FileIterator[B](filename, parser)


      /** [[OFile]] may be a very large collection. Cannot do 
       *  in-memory sorting. We split it into a few smaller files 
       *  and do on-disk merge sort.
       *
       *  @param ky    is the new sorting key.
       *  @param bufSz is the # of items per smaller file.
       */

      var bufSz = 50000    // Default # of items per small file.

      def setBufSz(n: Int) = { bufSz = n; this }


      override def assumeOrderedBy[N](ky: Key[B,N]): OCOL[N] = 
        if filename != "" then odisk(filename, ky).use(this)
        else OCOL(elems, ky).use(this)


      override def orderedBy[N](ky: Key[B,N]): OCOL[N] = orderedBy(ky, bufSz)


      def orderedBy[N](ky: Key[B,N], bufSz: Int): OCOL[N] =
        /** Split big file into smaller sorted files.
         */
        val scheduled = 
          for
            g <- cbi.grouped(bufSz)           // Group into bufSz chunks.
            s = OSeq(g, key).orderedBy(ky)    // Sort chunk in memory.
            f = s.cbi.tofile(formatter)       // Write chunk to file.
          yield 
            odisk(f, ky).protection(false)

        val groups = scheduled.toSeq        // Force files to be written now.

        close()

        /** Merge the sorted smaller files into a big sorted file.
         */
        val merged = 
          if groups.length == 0 then odisk("", ky)
          else if groups.length == 1 then groups.head 
          else groups.head.mergedWith(groups.tail: _*)

        merged.protection(false)


    
      /** [[OFile]] may be a very large collection. Cannot do in-memory
       *  reversing. We split it into a few smaller files and do on-disk
       *  merge reverse.
       */

      override def reversed: OCOL[K] = reversed(bufSz)

      def reversed(bufSz: Int): OCOL[K] =
        /** Split big file into smaller sorted files.
         */
        val scheduled =  
          for 
            g <- cbi.grouped(bufSz)         // Group into bufSz chunks.
            s = OSeq(g, key).reversed       // Reverse chunk in memory.
            f = s.cbi.tofile(formatter)     // Write chunk to file.
          yield 
            odisk(f, key.reversed).protection(false)

        val groups = scheduled.toSeq.reverse // Force files to be written now.

        close()

        /** Merge the reversed smaller files into a big reversed file.
         */

        val merged =
          if groups.length == 0 then odisk("", key.reversed)
          else if groups.length == 1 then groups.head 
          else oitr(groups.flatMap(_.cbi), key.reversed).userev(groups: _*)

        merged.protection(false)


      /** Slurp a copy of the file into memory
       */

      def slurped: String =
        try new String(Files.readAllBytes(Paths.get(filename)))
        catch { case _: Throwable => "" }


      /** Save a copy to a file
       */

      def saveAs(name: String, folder: String = TMP): OFile[B,K] = 
        val newname = OFile.mkFileName(name, folder)
        val oldname = Paths.get(filename)
        val saved   = oldname.toString match
          case "" =>
            // Collection is transient. Write it directly to the new file. 
            val copy = cbi.tofile(formatter, newname.toString)
            odisk(copy, key)
          case _ =>
            // Collection is a file. Copy it to the new file.
            Files.copy(oldname, newname, StandardCopyOption.REPLACE_EXISTING)
            odisk(newname.toString, key)
        close()
        saved


      /** Serialize the file. If it is already on disk, do nothing.
       *  An [[OColl]] may be associated with a temporary collection,
       *  needs to be destroyed / garbage collected after using it.  
       *  This is complex, as inheriting instances of [[OColl]] can
       *  be a collection on disk, a collection in memory, a transient
       *  collection with different properties and associated resources
       *  that need to be properly closed/terminated.
       */

      def serialized: OFile[B,K] = serialize()

      def serialize(name: String = "", folder: String = TMP): OFile[B,K] =
        if (filename != "" && name == "") then
          // filename != "" means [[OFile]] is already on disk.
          // name == "" means user doesnt care where file is serialized to.
          // Hence, when both are true, no need to do anything.
          this
        else 
          // Otherwise, must save this [[OFile]] to disk.
          // Unprotect it, if it is a tmp file.
          saveAs(name, folder).protection(name != "")


      /** Rename the file.
       */

      def renameAs(name: String, folder: String = TMP): OFile[B,K] =
        val newname = OFile.mkFileName(name, folder)
        val oldname = Paths.get(filename)
        val saved   = oldname.toString match
          case "" =>
            // Collection is transient. Write it directly to the new file. 
            val copy = cbi.tofile(formatter, newname.toString)
            odisk(copy, key)
          case _ =>
            // Collection is a file. Rename it to the new file.
            Files.move(oldname, newname, StandardCopyOption.REPLACE_EXISTING)
            odisk(newname.toString, key)
        close()
        saved


      /** Print n items in this [[OFile]]... for debugging purpose :-).
       */

      def printMe(n: Int): Unit = this.cbi.done {
        // Use .done to ensure file is autoclosed at end.
        _.take(n).foreach(s => { println(s"### $s ###\n") })
      }


    object OFile: 

      def apply[B,K]
           (key: Key[B,K], filename: String)
           (using ofile: HasOFile[B]): OFile[B,K] = 
        import java.nio.file.{ Paths, Files }
        if Files.exists(Paths.get(filename)) then
          ODisk(key, filename, ofile.parser, ofile.formatter, ofile.destructor)
        else throw new java.io.FileNotFoundException(filename)
        
            
      def apply[B,K](
            key: Key[B,K], filename: String, 
            parser: PARSER[B], formatter: FORMATTER[B]): OFile[B,K] =
        OFile(key, filename)(using HasOFile(parser, formatter))


      def apply[B,K]
           (filename: String, get: B => K)
           (using ord: Ord[K], ofile: HasOFile[B]): OFile[B,K] =
        OFile(Key.asc(get)(using ord), filename)(using ofile)


      def apply[B,K]
            (filename: String, get: B => K,
             parser: PARSER[B], formatter: FORMATTER[B]) 
            (using ord: Ord[K], ofile: HasOFile[B]): OFile[B,K] =
      OFile(Key.asc(get)(using ord), filename)(using HasOFile(parser,formatter))



      /** Function for making filenames in a consistent manner.
       */

      def mkFileName(
            name: String = "", 
            folder: String = TMP): java.nio.file.Path =
        val dir: java.nio.file.Path = folder match
          case "/tmp"  => Paths.get(System.getProperty("java.io.tmpdir")) 
          case ""      => Paths.get(System.getProperty("java.io.tmpdir")) 
          case "."     => Paths.get(".").toAbsolutePath.getParent
          case d       => Files.createDirectories(Paths.get(d))
        val fname = (name == "") match
          case true  => Files.createTempFile(dir, "ofile", ".tmp").toString
          case false => name
        dir.resolve(fname)


      /** Function for deleting an underlying file.
       *  Shallow deletion is implemented here.
       */

      def destructorOFile[B](ofile: OFile[B,_]): Unit =
        import dbmodel.DEBUG.message
        val filename = ofile.filename
        val protect  = ofile.protect
        val err = "**** OFile.destruct: Cannot delete file " + filename
        if (filename != "") then 
          try { if (!protect) Files.deleteIfExists(Paths.get(filename)) } 
          catch { case _: Throwable => message(err) }


      /** Default parser and unparser. These are generic and inefficient.
       *  User should supply parsers and unparsers for their files.
       *  The default parser/unparser ignore position info.
       */

      def defaultFormatter[B]
            (options: String)
            (b:B, position:Int = 0): String =
        val stream = new ByteArrayOutputStream()
        val ees = new ObjectOutputStream(stream)
        ees.writeObject(b)
        ees.close()
        new String(Base64.getEncoder().encode(stream.toByteArray), UTF_8)
 

      def defaultParser[B]
            (options: String)
            (es: Iterator[String], position: Int = 0): B =
        if (es.hasNext) 
          val encoded = es.next() 
          val bytes = Base64.getDecoder().decode(encoded.trim.getBytes(UTF_8))
          val ees = new ObjectInputStream(new ByteArrayInputStream(bytes))
          ees.readObject.asInstanceOf[B]
        else throw { new EOFException }

    end OFile



    /** Trait for type [[B]] to be [[OFile]]-enabled.
     *  If you want to use the default parser/unparser,
     *  you can enable it for specific type [[T]] by:
     *
     {{{ given HasOFile[T] = new HsOFile[T] { } }}}
     *
     */

    trait HasOFile[B]:
      val parser: PARSER[B] = OFile.defaultParser
      val formatter: FORMATTER[B] = OFile.defaultFormatter
      val destructor: DESTRUCTOR[B] = OFile.destructorOFile
 
    
    object HasOFile:

      /** Most times, users have their own parser/unparser
       */

      def apply[B](
            aParser: PARSER[B] = OFile.defaultParser, 
            aFormatter: FORMATTER[B] = OFile.defaultFormatter): HasOFile[B] =
        new HasOFile[B] {
          override val parser = aParser
          override val formatter = aFormatter
      }

    end HasOFile


    /** [[OTransient(key, ei, parser, formatter)]] represents 
     *  an iterator [[ei]] as an ordered collection which can be
     *  written to a file using the [[formatter]], and read back 
     *  using the [[parser]].
     */

    case class OTransient[B,K](
        override val key: Key[B,K],
        override val elems: IterableOnce[B],
        override val parser: PARSER[B],
        override val formatter: FORMATTER[B],
        override val destructor: DESTRUCTOR[B]) 
      extends OFile[B,K] { init() }


    /** [[OMemory(key, es, parser, formatter)]] represents a [[Seq]]
     *  [[es]] as an ordered collection which can be written to a file
     *  using the [[formatter]], and read back in using the [[parser]].
     */

    case class OMemory[B,K](
        override val key: Key[B,K],
        override val elems: Iterable[B],
        override val parser: PARSER[B],
        override val formatter: FORMATTER[B],
        override val destructor: DESTRUCTOR[B]) 
      extends OFile[B,K] { init() }


    /** [[ODisk(key, filename, parser, formatter)]] represents
     *  the file [[filename]] as an ordered collection [[OFile]].
     *  Items in the File are parsed using [[parser]] on demand 
     *  as the [[OFile]] is accessed. Items in the [[OFile]] are
     *  written out using [[formatter]] as required; this is useful,
     *  e.g., when the file has to be sorted.  The file is assumed
     *  to be already ordered by a [[key]].
     */

    case class ODisk[B,K](
        override val key: Key[B,K],
        override val filename: String, 
        override val parser: PARSER[B],
        override val formatter: FORMATTER[B],
        override val destructor: DESTRUCTOR[B])
      extends OFile[B,K] { init() }


    /** Offer simple ways to convert between [[OSeq]] and [[OFile]]
     */

    extension [B,K](it: OSeq[B,K])(using ofile: HasOFile[B])
      def transientOFile: OFile[B,K] = OTransient(
        it.key, it.cbi, 
        ofile.parser, ofile.formatter, ofile.destructor)


    extension [B](it: IterableOnce[B])(using ofile: HasOFile[B])
      def transientOFile: OFile[B,Unit] = OTransient(
        Key.asc((b:B) => ()), it,
        ofile.parser, ofile.formatter, ofile.destructor)

      def transientOFile[K](key: Key[B,K]): OFile[B,K] = OTransient(
        key, it,
        ofile.parser, ofile.formatter, ofile.destructor)

      def transientOFile[K:Ord](key: B => K): OFile[B,K] = OTransient(
        Key.asc(key), it,
        ofile.parser, ofile.formatter, ofile.destructor)


  end DBFile




/** Examples *****************************************************
 *
 {{{


   import dbmodel.OrderedCollection.{ given, * }
   import dbmodel.Predicates.{ given, * }
   import dbmodel.DBSQL.{ given, * }
   import dbmodel.DBFile.*
   import scala.language.implicitConversions


// Create some data... phone models


   case class Phone(model: String, price: Int, brand: String)


   object PHONE {
     // convert Phone into a triple (m, p, b), for writing to file
     def unapply(p: Phone) = (p.model, p.price, p.brand)

     // convert a triple (m, p, b) into a Phone, for reading from file
     def tupled(p: (String, Int, String)) = 
       val (model, price, brand) = p
       Phone(model, price, brand)
   }


   val s21   = Phone("S21", 1000, "Samsung")
   val a52   = Phone("A52", 550, "Samsung")
   val a32   = Phone("A32", 350, "Samsung")
   val n10   = Phone("N10", 360, "OnePlus")
   val a94   = Phone("A94", 400, "Oppo")
   val m40   = Phone("Mate 40", 1200, "Huawei")
   val pix5  = Phone("Pixel 5", 1300, "Google")
   val pix4  = Phone("Pixel 4", 500, "Google")


// Phone models can be ordered by brand or by price


   val kPrice = Key.asc[Phone,Int](x => x.price)
   val kBrand = Key.asc[Phone,String](x => x.brand)


// Store into a tmp file, sorted by brand
// using default formatter and parser:


   val formatter = (_: String) => (x:Phone, n: Int) => 
     OFile.defaultFormatter("")(PHONE.unapply(x), n)


   val parser = (_: String) => (xs: Iterator[String], n: Int) => 
     PHONE.tupled(OFile.defaultParser("")(xs))



// Here is one way to read/write a phone file, 
// using parser/formatter implicitly.


   given HasOFile[Phone] = HasOFile[Phone](parser, formatter)


   Vector(s21, a52).orderedBy(kBrand).transientOFile.saveAs("zzzz")

   val zzzz = OFile(kBrand, "zzzz") 

   zzzz(1)

   zzzz.protection(false).close()



// Here is another way to read/write a phone file,
// using parser/formatter explicitly.


   val fname = {
     val vs = Vector(s21, a52, a32, n10,a94, m40, pix5, pix4)
     vs orderedBy kBrand 
   }.tofile(formatter, "wlstest")

   val OByBrand = OFile(fname, (x:Phone) => x.brand, parser, formatter)


// Phones grouped by brands


   for bps <- OByBrand.clustered 
   do println(s"brand = ${bps._1}\n  ${bps._2}\n")


// Phones, re-sorted by price 


   val OByPrice = OByBrand orderedBy kPrice

   OByPrice.protection(true)  // Protect file from deletion


// Phones and their price competitors from
// other brands within +/- $150.
// Non-equijoin alert!


   for 
     (p, cs) <- OByPrice join OByPrice on DLEQ(150)
     ob = cs.filter(_.brand != p.brand) 
   do 
     println(s"${p}\n  ${ob}\n") 


// Samsung phones and their price competitors 
// from Google, within 20%.
// Non-equijoin alert!


   for ((p, cs) <- OByPrice.filter(_.brand == "Samsung")
                     join OByPrice.filter(_.brand == "Google")
                     on SZPERCENT(1.2))
   do 
     println(s"${p}\n  ${cs}\n") 


// Clean up


   OByBrand.protection(false)    // Allow file to be deleted.
   OByPrice.protection(false)    // Allow file to be deleted.
   OByPrice.close()              // Delete both files. Note that
   OByBrand.close()

 }}}
 *
 ****************************************************************/




