

package proteomics

  /** Version 11.1, to be used with dbmodel.TSVModel Version 11.1.
   *
   *  Wong Limsoon
   *  14 May 2023
   */


  object CORUMModel:

    /** A simple model to turn CORUM protein complex files
     *  into ordered collection.
     */

    import dbmodel.Synchronizable.CBI
    import dbmodel.OrderedCollection.{ OColl, Key }
    import dbmodel.DBFile.{ OFile, OTransient, PARSER, FORMATTER }
    import dbmodel.TSVModel.{ *, given }
    import dbmodel.TSVModel.FIELDTYPE.*
    import scala.deriving.Mirror
    import dbmodel.DBFile.TMP    // Default folder to use.

    type ID   = Int
    type Bool = Boolean


    type CORUMITERATOR  = CBI[CorumEntry]
    type CORUMFILE      = OFile[CorumEntry,ID]
    type CORUM[K]       = OFile[CorumEntry,K]


    case class GO(id: String, description: String)

    case class FunCat(id: String, description: String)

    case class SubUnit(
      uniprot: String, entrez: String, 
      gene: String, geneSynonyms: String, protein: String)


    case class CorumEntry(
          complexID: ID,
          complexName: String,
          organism: String,
          synonyms: String,
          cellLine: String,
          subunits: List[SubUnit],
          purification: String,
          go: List[GO],
          funcat: List[FunCat],
          pubmed: Int,
          subunitComment: String,
          complexComment: String,
          diseaseComment: String,
          swissprotOrganism: String):
      def goIDs              = go map { _.id }
      def goDescriptions     = go map { _.description }
      def funcatIDs          = funcat map { _.id }
      def funcatDescriptions = funcat map { _.description }
      def subunitsUniprot    = subunits map { _.uniprot }
      def subunitsEntrez     = subunits map { _.entrez }
      def subunitsGene       = subunits map { _.gene }
      def subunitsSynonyms   = subunits map { _.geneSynonyms }
      def subunitsProtein    = subunits map { _.protein }
      val schema = CorumSupport.schema


    val CorumFile = TSVFile[CorumEntry](CorumSupport)


    /** Here is how you can convert between
     *  [[es: IterableOnce[CorumEntry] ]] and [[CORUMFILE]]
     *
     {{{ es.transientTSVFile(CorumSupport.corumFileKey)(using CorumFile) }}}
     *
     */


    object CorumSupport extends TSVSchema[CorumEntry]:

      /** Schema for CORUM file
       */

      val schema = Vector(
        "ComplexID" -> INT,
        "ComplexName" -> STRING, 
        "Organism" -> STRING,
        "Synonyms" -> STRING, 
        "Cell line" -> STRING,
        "subunits(UniProt IDs)" -> STRING, 
        "subunits(Entrez IDs)" -> STRING,
        "Protein complex purification method" -> STRING,
        "GO ID" -> STRING, 
        "GO description" -> STRING, 
        "FunCat ID" -> STRING, 
        "FunCat description" -> STRING,
        "subunits(Gene name)" -> STRING, 
        "Subunits comment" -> STRING,
        "PubMed ID" -> INT, 
        "Complex comment" -> STRING, 
        "Disease comment" -> STRING,
        "SWISSPROT organism" -> STRING,
        "subunits(Gene name syn)" -> STRING, 
        "subunits(Protein name)" -> STRING)


      /** Encode and decode GO info
       */

      def encodeGO(corum: CorumEntry): (String,String) =
        import corum._
        (goIDs.mkString(";"), goDescriptions.mkString(";"))

      def decodeGO(i: String, d: String): List[GO] =
        val (is, ds) = (i split ";", d split ";")
        val zipped = is zip ds
        val tupled = zipped map { summon[Mirror.Of[GO]].fromProduct(_) }
        tupled.toList


      /** Encode and decode FunCat info
       */

      def encodeFunCat(corum: CorumEntry): (String,String) =
        import corum._
        (funcatIDs.mkString(";"), funcatDescriptions.mkString(";"))

      def decodeFunCat(i: String, d: String): List[FunCat] =
        val (is, ds) = (i split ";", d split ";")
        val zipped   = is zip ds
        val tupled   = zipped map { summon[Mirror.Of[FunCat]].fromProduct(_) }
        tupled.toList


      /** Encode and decode subunit info
       */

      def encodeSub(corum: CorumEntry): (String,String,String,String,String) =
        import corum._ 
        (subunitsUniprot.mkString(";"),
         subunitsEntrez.mkString(";"),
         subunitsGene.mkString(";"), 
         subunitsSynonyms.mkString(";"),
         subunitsProtein.mkString(";"))

      def decodeSub(u: String, e: String, g: String, s: String, p: String)
          : List[SubUnit] = {
        val uniprot = u split ";"
        val entrez  = e split ";"
        val gene    = g split ";"
        val synonym = s split ";"
        val protein = p split ";"
        val mn = List(uniprot, entrez, gene, synonym, protein).map(_.length).max
        def get(xs: Array[String], i: Int) = 
          if xs.isDefinedAt(i) then xs(i) else ""

        for 
          i <- (0 to mn - 1).toList
          u = get(uniprot, i)
          e = get(entrez, i)
          g = get(gene, i)
          s = get(synonym, i)
          p = get(protein, i)
        yield 
          SubUnit(u, e, g, s, p)
      }


      /** CORUM-to-TSV encoder
       */

      val encoder: ENCODER[CorumEntry] = (corum: CorumEntry) =>
        import corum._
        val (u, e, g, s, p) = encodeSub(corum)
        val (fi, fd)        = encodeFunCat(corum)
        val (gi, gd)        = encodeGO(corum)
        Array(complexID, complexName, organism, synonyms, cellLine, u, e, 
              purification, gi, gd, fi, fd, g, subunitComment, pubmed, 
              complexComment, diseaseComment, swissprotOrganism, s, p)



      /** TSV-to-CORUM decoder
       */

      def make(tsv: TSV): CorumEntry = 
        CorumEntry(
          complexID         = tsv.int(0),
          complexName       = tsv.str(1),
          organism          = tsv.str(2),
          synonyms          = tsv.str(3),
          cellLine          = tsv.str(4),
          purification      = tsv.str(7),
          go                = decodeGO(tsv.str(8), tsv.str(9)),
          funcat            = decodeFunCat(tsv.str(10), tsv.str(11)),
          subunitComment    = tsv.str(13),
          pubmed            = tsv.int(14),
          complexComment    = tsv.str(15),
          diseaseComment    = tsv.str(16),
          swissprotOrganism = tsv.str(17),
          subunits = decodeSub(tsv.str(5), tsv.str(6), tsv.str(12),
                             tsv.str(18), tsv.str(19)))


      /** Key for Corum file
       */

      val corumFileKey: Key[CorumEntry,ID] = Key(_.complexID, Ordering[Int])

    end CorumSupport

  end CORUMModel




/** Examples ***********************************************
 *
{{{

    import proteomics.CORUMModel.*
    import scala.language.implicitConversions

    val corum = CorumFile("test-proinfer/complexes.txt")

    corum(9).subunits

    corum.length

    corum.filter( _.subunits.length > 5 ).saveAs("xx")

    val xx = CorumFile("xx")

    xx(8).subunits.length

    xx(8).goIDs

    xx(8).subunitsUniprot

    xx(8).subunitsEntrez

    xx.protection(false).close()

}}}
 *
 */



