/*
 *  CHROMOSOME INVARIANT (the gene invariants are assumed to hold as well--and it is up to
 *  its implementation to maintain the invariants)
 *  1) The joinNumbers of all of the genes in a chromosome are distinct. Because a query
 *     cannot have to joins that are exactly equal (in terms of the outer an inner relations)
 *     we are guaranteed that we will never encode a plan that has repeated joinNumbers. So
 *     this invariant is simply a reflection of this aforementioned fact.
 *  2) Crossover is only defined on chromosomes that have the same number of genes and that
 *     none of the genes have mutually exclusive joinNumbers and that the number of genes
 *     in a chromosome is at least 3
 *  3) Mutation is only defined on a chromosome that has at least 2 genes
 */

#include <cstdlib>
#include <cassert>
#include <iostream>
#include <fstream>
#include <string>
#include "chromosome.h"
#include "GAutility.h"
#include "gene.h"


//===============================CONSTRUCTORS=========================================

/*  Chromosome
 *  Create a chromosome containing a single default gene
 */
Chromosome::Chromosome()
{
  chromosome.empty();
}

/*  Chromosome 
 *  Create a chromosome that is the same as the given vector of genes.  We must take
 *  care to preserve CHROMOSOME INVARIANT #1 so that if we are given a vector of genes
 *  genes in which two or more genes have repeated joinNumbers, then we must not
 *  construct the chromosome (and instead, we have an assrt failure)
 */
Chromosome::Chromosome(vector<Gene> c)
{
  bool matchingJoin = false;
  for (size_t i = 0; i < c.size() - 1; ++i)
  {
	for (size_t j = i + 1; j < c.size() - 1; ++j)
	{
	  if (c[i].getJoinNumber() == c[j].getJoinNumber())
	  {
		// we've found a gene (indexed by j) with the same
		// joinNumber as the gene indexed by i
		matchingJoin = true;
	  }
	}
  } 
  assert (! matchingJoin);

  chromosome = c;
}

//=============================CONSTANT ACCESS FUNCTIONS===============================

/*  size
 *  Returns the size of the chromosome (i.e, the number of genes in the chromosome)
 */
size_t Chromosome::size() const
{
  return chromosome.size();
}

/*  getGene
 *  Returns a gene from the chromosome using an array-style index so that, for
 *  example, c.getGene(4) will retun the 5th gene in the chromosome.
 */
Gene Chromosome::getGene(size_t i) const
{
  // PRECONDITION: i is not greater than the number of elements in the matrix;
  // that is, we won't be accessing the array outside of its boundaries
  assert(i <= chromosome.size() - 1);
  return chromosome[i];
}

/*  findGene
 *  Returns a gene from the chromosome using the joinNumber of the gene. Returns
 *  true iff the given joinNumber exists in the chromosome, and if true, it
 *  updates the given gene g to the gene we've found. Thus, for example,
 *  c.findGene(3, g) will search the chromosome looking for a gene with a 
 *  joinNumber equal to 3. If found, it will set g to this gene and return true,
 *  otherwise it will leave g alone and return false.
 */
bool Chromosome::findGene(unsigned int n, Gene& g) const
{
  for (size_t i = 0; i < chromosome.size(); ++i)
  {
	if (n == chromosome[i].getJoinNumber()) 
	{
	  g = chromosome[i];
	  return true;
	}
 }
  return false;
}

//========================MODIFICATION MEMBER FUNCTION==============================

/*  addGene
 *  Adds a given gene to the chromosome, while still preserving the CHROMOSOME INVARIANT
 *  that a chromosome's genes all must have distinct joinNumbers. This routine checks
 *  to make sure that no gene has a joinNumber equal to that of g, and if this is so,
 *  then it pushes g onto the end of the chromosome and returns true. Otherwise it simply
 *  returns false;
 */
bool Chromosome::addGene(Gene g)
{
  for (size_t i = 0; i < chromosome.size(); ++i)
  {
	if (g.getJoinNumber() == chromosome[i].getJoinNumber()) 
	{
	  // a gene with g's joinNumber already is in the chromosome, so inserting
	  // g would violate the class invariant.
	  return false; 
	}
  }
  chromosome.push_back(g);
  return true;
}

//=============================GENETIC ALGORITHM OPERATORS===============================

/*  mutate
 *  Mutating a chromosome consists of randomly selecting one of the chromosome's genes and
 *  the randomly deciding to do one of the following operations:
 *    0) toggle the gene's orientation
 *    1) swap the positions of two random genes in the chromosome
 */
void mutate(Chromosome& c)
{
  int mmRand = randomInt(0, 1);  // the random number determining which mutation method to use.
								 // this is guaranteed to be either 0 or 1

  int jNum;  // will get the joinNumber of the gene indexed by gRand
  char Orien; // will get the orientation of the gene indexed by gRand

  // do no mutations if the chromosome has fewer than 2 genes: CHROMOSOME INVARIANT 3
  if (c.size() >= 2)
  {
	switch(mmRand)
	{
		case(0):   // choose a gene and alter its joinOrientation 
		{
			int gRand = randomInt(0, c.size() - 1); // the random number determining which gene to alter (in terms of
											        // gene's index number). This is an integer in [0, c.size() - 1]

			jNum = c.getGene(gRand).getJoinNumber();
			Orien = c.getGene(gRand).getOrientation();

			// ensure we won't be trying to manipulate a default-constructed gene
			assert(jNum != 0 && Orien != '\0');

			// alter the joinOrientation, switching a current 'a' to 'r' and vice-versa
			if (Orien == 'a') Orien = 'r';
			else Orien = 'a';
	  
			Chromosome tmp;
			// copy all of c's genes into tmp--except for the gene indexed by gRand, in whose
			// case we use the newly assigned gene values
			for (size_t i = 0; i < c.size(); ++i)
			{
			  if ((int)i == gRand)
			  {
				Gene g(jNum, Orien);
				tmp.addGene(g);
			  }
			  else
			  {
				tmp.addGene(c.getGene(i)); 
			  }
			}
			c = tmp;
			break;
		}
	    case(1): // choose two genes and interchage their positions in the chromosome 
		{
		  int gRandA;
		  int gRandB;
		  do // make sure these will not be equal
		  {
			gRandA = randomInt(0, c.size() - 1); // the random numbers determining which genes to switch (in terms of
			gRandB = randomInt(0, c.size() - 1); // their index numbers). These are an integers in [0, c.size() - 1]
		  }
		  while (gRandA == gRandB);

		  Chromosome tmp;
		  // copy all of c's genes into tmp, except for the genes indexed by gRandA and gRandB, in which case we
		  // copy gRandB's gene into the place of gRandA's gene, and vice-versa, thereby switching each gene
		  // into the other's position
		  for (size_t i = 0; i < c.size(); ++i)
		  {
			if ((int)i == gRandA) tmp.addGene(c.getGene(gRandB));  // copy gRandB into gRandA's place
			else if ((int)i == gRandB) tmp.addGene(c.getGene(gRandA)); // copy gRandA into gRandB's place
			else tmp.addGene(c.getGene(i));  // maintain the position and just copy from c into tmp
		  }
		  c = tmp;
		  break;
		}
	}
  }
}

/*  crossover
 *  Perform crossover on the two given Chromosomes.  We must ensure that we uphold CHROMOSOME
 *  INVARIANT #2, which states that crossover is only defined on chromosomes of the same size,
 *  and that none of the genes have mutually exclusive joinNumbers
 */
bool crossover(Chromosome& c1, Chromosome& c2)
{
  assert(c1.size() == c2.size());

  bool matchingJoin = false;

  // ensure that c1 and c2 all have genes with the same join numbers
  for (size_t i = 0; i < c1.size() - 1; ++i)
  {
	for (size_t j = 0; j < c2.size() - 1; ++j)
	{
	  if (c1.getGene(i).getJoinNumber() == c2.getGene(j).getJoinNumber())
	  {
		// we've found a gene in c2 that has the same join number as a gene in c1
		matchingJoin = true;
	  }
	}
	if (! matchingJoin) return false;
  }

  if (c1.size() < 4) return false;

  Chromosome c1_old = c1;
  Chromosome c2_old = c2;

  c1 = cross(c1_old, c2_old); // let c1_old dominate and c2_old be passive
  c2 = cross(c2_old, c1_old); // let c2_old dominate and c1_old be passive
 
  return true;
}

/*  cross
 *  A helper function used in the crossover function. This is the function that
 *  actually performs the string manipluations on the given chromosomes. c1 is
 *  treated as the dominant chromosome, while c2 is the pasive chromosome. That
 *  is to say that c1 injects its genetic information into c2, and then we return
 *  the newly modified version of c2. Since we only return one chromosome, the
 *  cross() function must be called twice in crossover(), once when c1 is dominant
 *  and once when c2 is dominant.
 */
Chromosome cross(Chromosome c1, Chromosome c2)
{ 
  int gRandA;
  int gRandB;

  do // ensure that the numbers will not be equal
  {
	gRandA = randomInt(0, c1.size() - 1); // the random numbers determining which genes to switch (in terms of 
	gRandB = randomInt(0, c1.size() - 1); // their index numbers). These are an integers in [0, c.size() - 1]
  }
  while (gRandA == gRandB);

  unsigned int jNumA = c1.getGene(gRandA).getJoinNumber(); // the joinNumbers of the genes indexed
  unsigned int jNumB = c1.getGene(gRandB).getJoinNumber(); // respectively by gRandA and gRandB

  Chromosome tmp; 

  // copy into tmp all genes in c2 that have joinNumbers not equal to those of gRandA and gRandB in c1
  for (size_t i = 0; i < c2.size(); ++i)
  {
	if (c2.getGene(i).getJoinNumber() != jNumA && c2.getGene(i).getJoinNumber() != jNumB)
	{
	  tmp.addGene(c2.getGene(i));
	}
  }

  // now we have a chromosome with 2 fewer genes, where effectively the genes indexed by gRandA
  // and gRandB have been removed from c2, and all the remaining genes are scrunched leftward.
  // So now we simply need to insert the genes denoted by gRandA and gRandB in c1 into the two 
  // empty spaces at the end of tmp.
  tmp.addGene(c1.getGene(gRandA));
  tmp.addGene(c1.getGene(gRandB));

  return tmp;
}


/*  fitness
 *  Does some preparation to the chromosome (like assigning inner and outer relations, finding the root node
 *  of the plan tree etc) so that we can use the cost() function. It is important to note that since the
 *  fitness function is based on the cost of the chromosome/plan a lower fitness is better than a larger one
 */
double fitness(Chromosome c)
{  
  // go through the chromosome and properly assign the inner and outer relations for each gene
  for (size_t i = 0; i < c.size(); ++i)
  {
	  Gene g = c.getGene(i);
	  char rel1;
	  char rel2;

	  if (g.getJoinNumber() == 1) 
	  {
		  rel1 = 'A';
		  rel2 = 'B';
	  }
	  else // some other join number
	  {
		  rel1 = intToChar(g.getJoinNumber() - 1); // relies heavily on our right-deep assumptions
		  rel2 = char(65 + g.getJoinNumber()); // relies heavily on our right-deep assumptions 
	  }


	  // we now know what the relations are, we just need to look at the orientation
	  // to see what order they should be in
	  if (g.getOrientation() == 'a')
	  {
		  g.setOuterRel(rel1);
		  g.setInnerRel(rel2);
	  }
	  else // reverse orientation
	  {	
		  g.setOuterRel(rel2);
		  g.setInnerRel(rel1);
	  }

	  Chromosome tmp;
	  // copy all of c's genes into tmp--except for the gene indexed by i, in whose
	  // case we use the newly remodeled gene value
	  for (size_t j = 0; j < c.size(); ++j)
	  {
			if (j == i)
			{
			  //Gene g(jNum, Orien);
				tmp.addGene(g);
			}
			else
			{
				tmp.addGene(c.getGene(j)); 
			}
	  }
	  c = tmp;
  }

  // find the root join. This will be the join we want to send to 
  // the cost estimation function. Because of our initial right-deep
  // assumption, its just the gene with the highest joinNumber!

  int rootNum = 1; // the joinNumber of the "root" gene

  for (size_t i = 0; i < c.size(); ++i)
  {
	if (c.getGene(i).getJoinNumber() > (size_t)rootNum) rootNum = c.getGene(i).getJoinNumber();
  }

  // at this point, rootNum points to the join number of the root join, so we just
  // want to call the cost function on this join, and the cost function will take
  // care of recurring down through the join tree, calcluating the cost as it goes

  // note that the cost function must be passed a char, so any ints must be casted
  return cost(intToChar(rootNum), c);
}

/* cost
 * This is the most crucial part of the entire algorithm. I've decided to implement the cost
 * function recursively. In particular, the function is called on the root node of the plan 
 * tree and then it recurs through the subtrees, computing intermetidate costs as it goes.
 * The equation itself considers the number of tuple reads (the number of rows in a base table)
 * and the number of tuple reads (the number of rows produced from a given join). The actual
 * formaula is: cost(join) = R(outerRelation) * R(innerRelation) + W(join), where R is the 
 * number of tuple reads and W is the number of tuple writes.  Eventually, the equation will
 * recur down to the leaf-level, we'll get something like cost(join1) = R(A)*R(B) +W(join1)
 * where R(A) and R(B) are the number of tuples in the base relations A and B, respectively;
 * so having these indivisible base tables ensures we have a terminal basis for the recursion.
 * Having the signature of the function be a char might see odd at first, but this has 
 * consistently been the manner of representing relations (both tables and joins) in this
 * project; thus a numerical char corresponds to a join, and an alphabetical join refers to 
 * a base table. Finally, a few words must be said about materializing and pipeleing. In order
 * to add more variety to this cost function, it has been made to consider whether or not a 
 * join can be pipelined. Like most real-world optimizers, this can be determined if the join
 * is the outer (left) child of another join.  If this is the case, then we don't need to
 * materialize the result of the join (and so we don't need to add W(join) before we multiply, 
 * and so we reduce cost). Thus, the cost assigning algorithm checks to see if a join is 
 * able to be pipelined, and if so, it uses just an RR equation (instead of the RR+W equation).
 * By implication, this setup implies that a fully left-deep tree is going to be the most
 * optimal, because it will have all of its intermediate joins pipelined. This also is not
 * unlike real-world optimizers
 */
double cost(char rel, Chromosome c)
{
	vector<pair<char, double> > tableTups; // holds pairs of base table names and their associated number of tuples
	vector<pair<char, double> > joinTups; // holds pairs of join numbers and their associated number of resulting tuples

	ifstream inFile;

	inFile.open("relTups.dat");
	if (! inFile)
	{
		cout << "\n*ERROR: could not open the file: relTups.dat\n";
		exit(0);
	}
	else 
	{
		char relation;
		int unscaledTuples; // the values taken right out of the file
		double nTuples; // the values after scaling a bit (to avoid overflow)

		while (inFile.peek() != EOF)
	    {
			inFile >> relation;
			inFile >> unscaledTuples;
			nTuples = double(unscaledTuples) / 100; // resize number of tuples to avoid overflow
			if (nTuples == 0) nTuples = 1; // never zero out the number of tuples

			if (isalpha(relation)) // then we're still reading in base table information
			{
				tableTups.push_back(pair<char, double>(relation, nTuples));
			}	
			else // we've reached the join tuple information (relations represented as numbers)
			{
				joinTups.push_back(pair<char, double>(relation, nTuples));
			}
		}
	}	
	inFile.close();

	// so at this point, all of the tuple information should be in the vectors

	if (isalpha(rel)) // we know its a base table, and not a join
	{
		double Trel; // the number of tuples in rel (the base table)
	 
		// scan through the tableTups vector, looking for a pair.first equal to our rel
		for (size_t i = 0; i < tableTups.size(); ++i)
		{
			// we've found the relation we care about, so copy its # of tuples into Trel
		  if (tableTups[i].first == rel) Trel = tableTups[i].second;
		}
		return Trel;
	}
	else // we know its a join, so we need to get the gene for use in the more complex cost equation
	{
		double Trel; // the number of tuples that result from rel (the join)
		Gene g; // the gene with the joinNumber we care about
		c.findGene(charToInt(rel), g); // get the gene with the joinNumber we care about

		// scan through the joinTups vector, looking for a pair.first equal to our rel
		for (size_t i = 0; i < joinTups.size(); ++i)
		{
			// we've found the join we care about, so copy its # of tuples into Trel
			if (joinTups[i].first == rel) Trel = joinTups[i].second;
		}
		
		Gene parent;
		bool isLeftChild = false; // a flag that says whether or not rel is the left child of any node

		// if rel has a parent (i.e, rel is not the root) and rel is the outer (left) relation
		if (c.findGene(charToInt(rel + 1), parent) && parent.getOuterRel() == rel) isLeftChild = true;

		// if rel is a left child, it can be pipelined, so no need to add in Trel
		// else rel is not a left child, so we need to materialize the results.
		if (isLeftChild) return cost(g.getOuterRel(), c) * cost(g.getInnerRel(), c);
		else return cost(g.getOuterRel(), c) * cost(g.getInnerRel(), c) + Trel;
	}
}

//========================================== OPERATOR =============================================

/*
 *  Insertion operator: output all of the genes in the chromosome, separated by spaces
 */
ostream& operator << (ostream& outs, const Chromosome& c)
{
  for (size_t i = 0; i < c.size(); ++i)
  {
	outs << c.getGene(i) << " ";
  }
  return outs;
}
