//=============================================================================
#include "movieIO.h"
#include "rgbImage.h"
#include "featureCorrelator.h"
#include "subWindow.h"
#include "percentBar.h"
#include "math.h"

extern "C" void svdcmp( float **a, int m, int n, float *w, float **v );
extern "C" void svbksb( float **u, float *w, float **v, int m, int n,
			float *b, float *x );
extern "C" void gaussj( float **a, int n, float **b, int m );

// C = A.B
#define MATMULT_3X3( A, B, C ) \
  (C)[0] = (A)[0]*(B)[0] + (A)[1]*(B)[3] + (A)[2]*(B)[6]; \
  (C)[1] = (A)[0]*(B)[1] + (A)[1]*(B)[4] + (A)[2]*(B)[7]; \
  (C)[2] = (A)[0]*(B)[2] + (A)[1]*(B)[5] + (A)[2]*(B)[8]; \
  (C)[3] = (A)[3]*(B)[0] + (A)[4]*(B)[3] + (A)[5]*(B)[6]; \
  (C)[4] = (A)[3]*(B)[1] + (A)[4]*(B)[4] + (A)[5]*(B)[7]; \
  (C)[5] = (A)[3]*(B)[2] + (A)[4]*(B)[5] + (A)[5]*(B)[8]; \
  (C)[6] = (A)[6]*(B)[0] + (A)[7]*(B)[3] + (A)[8]*(B)[6]; \
  (C)[7] = (A)[6]*(B)[1] + (A)[7]*(B)[4] + (A)[8]*(B)[7]; \
  (C)[8] = (A)[6]*(B)[2] + (A)[7]*(B)[5] + (A)[8]*(B)[8]
  
#define MATMULT_3X3b( A, B, c1, c2, c3 ) \
  (c1)[0] = (A)[0]*(B)[0] + (A)[1]*(B)[3] + (A)[2]*(B)[6]; \
  (c1)[1] = (A)[0]*(B)[1] + (A)[1]*(B)[4] + (A)[2]*(B)[7]; \
  (c1)[2] = (A)[0]*(B)[2] + (A)[1]*(B)[5] + (A)[2]*(B)[8]; \
  (c2)[0] = (A)[3]*(B)[0] + (A)[4]*(B)[3] + (A)[5]*(B)[6]; \
  (c2)[1] = (A)[3]*(B)[1] + (A)[4]*(B)[4] + (A)[5]*(B)[7]; \
  (c2)[2] = (A)[3]*(B)[2] + (A)[4]*(B)[5] + (A)[5]*(B)[8]; \
  (c3)[0] = (A)[6]*(B)[0] + (A)[7]*(B)[3] + (A)[8]*(B)[6]; \
  (c3)[1] = (A)[6]*(B)[1] + (A)[7]*(B)[4] + (A)[8]*(B)[7]; \
  (c3)[2] = (A)[6]*(B)[2] + (A)[7]*(B)[5] + (A)[8]*(B)[8]

// Determinant of a 3X3 matrix
#define DET_3X3(a1,a2,a3,b1,b2,b3,c1,c2,c3) \
  (  (a1)*((b2)*(c3)-(b3)*(c2)) \
    -(b1)*((a2)*(c3)-(a3)*(c2)) \
    +(c1)*((a2)*(b3)-(a3)*(b2)) )
    
#define DET_3X3b(A,B,C) \
  (  (A)[0] * ((B)[1]*(C)[2]-(B)[2]*(C)[1]) \
    -(B)[0] * ((A)[1]*(C)[2]-(A)[2]*(C)[1]) \
    +(C)[0] * ((A)[1]*(B)[2]-(A)[2]*(B)[1]) )

#define DET_3X3c(A) \
  DET_3X3b( (A)[0], (A)[3], (A)[6] )

// Determinant of a 4X4 matrix
#define DET_4X4(A,B,C,D) \
  (  (A)[0] * DET_3X3b( &(B)[1], &(C)[1], &(D)[1] ) \
    -(B)[0] * DET_3X3b( &(A)[1], &(C)[1], &(D)[1] ) \
    +(C)[0] * DET_3X3b( &(A)[1], &(B)[1], &(D)[1] ) \
    -(D)[0] * DET_3X3b( &(A)[1], &(B)[1], &(C)[1] ) )

#define DET_4X4b(A) DET_4X4( &(A)[0], &(A)[4], &(A)[8], &(A)[12] )

void computeM( const float *F, const float *e1, const float *e2,
	       float *P1, float *P2 )
{
  int i;
	  
  // P1 is set to be I
  for ( i=1; i<12; i++ )
    P1[i]=0.0f;
  P1[0] = 1.0f;
  P1[5] = 1.0f;
  P1[10] = 1.0f;

  // P2 = [ [e2]XF | e2 ]
  float e2XMat[3][3];
  e2XMat[0][0]= 0.0f;   e2XMat[0][1]=-e2[2];  e2XMat[0][2]= e2[1];
  e2XMat[1][0]= e2[2];  e2XMat[1][1]= 0.0f;   e2XMat[1][2]=-e2[0];
  e2XMat[2][0]=-e2[1];  e2XMat[2][1]= e2[0];  e2XMat[2][2]= 0.0f;

  MATMULT_3X3b( e2XMat[0], F, &P2[0], &P2[4], &P2[8] );
  P2[ 3] = e2[0];
  P2[ 7] = e2[1];
  P2[11] = e2[2];
}

void matrixPseudoInverse( const float *A, float *Res )
{
  //          +      T      T  -1
  // Res  =  A   =  A  (A  A  )
  int i,j,k;
  
  float comb[3][3];
  for ( i=0; i<3; i++ )
    for ( j=0; j<3; j++ )
  {
    comb[i][j] = A[i*4]*A[j*4];
    for ( k=1; k<4; k++ )
      comb[i][j] += A[i*4+k]*A[j*4+k];
  }

  float *c[4];
  for ( i=0; i<3; i++ )
    c[i+1] = comb[i]-1;

  if ( DET_3X3c(comb)!=0 )
    gaussj( c, 3, 0, 0 );
  
  for ( i=0; i<4; i++ )
    for ( j=0; j<3; j++ )
      Res[i*3+j] = A[i]*comb[0][j] + A[4+i]*comb[1][j] + A[8+i]*comb[2][j];
}

void computeMfromP( const float *P1,  const float *P2,
		    const float *P2p, const float *P3p,
		    float *P3 )
{
  int i,j,k;
  // P3 = P2.pseudoinv(P2p).P3p
  // P3 = pseudoinv(P2p).P2.P3p
  float invP2p[12], tmp[4][4];
  matrixPseudoInverse( P2p, invP2p );

  for ( i=0; i<4; i++ )
    for ( j=0; j<4; j++ )
  {
    tmp[i][j] = invP2p[i*3]*P2[j];
    for ( k=1; k<3; k++ )
      tmp[i][j] += invP2p[i*3+k]*P2[k*4+j];
  }
  
  for ( i=0; i<3; i++ )
    for ( j=0; j<4; j++ )
      P3[i*4+j] = P3p[i*4  ]*tmp[0][j] + P3p[i*4+1]*tmp[1][j] +
	          P3p[i*4+2]*tmp[2][j] + P3p[i*4+3]*tmp[3][j];
  
//   for ( i=0; i<3; i++ )
//   {
//     printf("| ");
//     for ( int j=0; j<4; j++ )
//       printf("%+6.4f ",P2[i*4+j] );
//     if (i==1)
//       printf("|   =   | ");
//     else
//       printf("|       | ");
//     for (  j=0; j<4; j++ )
//       printf("%+6.4f ",P3[i*4+j] );
//     printf("|\n");
//   }
//   printf("\n");
}

void computeFp( const float *F, float *Fp, float *e1, float *e2 )
{
  int i,j,k;
  
  float aBuf[3][3];
  float *a[4];
  for ( i=0; i<3; i++ )
  {
    a[i+1] = aBuf[i]-1;
    for ( j=0; j<3; j++ )
      aBuf[i][j] = F[i*3+j];
  }
  
  float vBuf[3][3];
  float *v[4];
  for ( i=0; i<3; i++ )
    v[i+1]=vBuf[i]-1;

  float w[4];

  svdcmp( a, 3, 3, w, v );

  w[3] = 0.0f;

  for ( i=0; i<3; i++ )
    for ( j=0; j<3; j++ )
  {
    Fp[i*3+j] = 0.0f;
    for ( k=0; k<3; k++ )
      Fp[i*3+j] += aBuf[i][k]*w[k+1]*vBuf[j][k];
  }

  for ( i=0; i<3; i++ )
    e1[i] = vBuf[i][2];
  
  for ( i=0; i<3; i++ )
    e2[i] = aBuf[i][2];
}

void computeFfromP( const float *P1, const float *P2, float *F )
{
  F[0] =  DET_4X4( &P1[4], &P1[8], &P2[4], &P2[8] );
  F[1] = -DET_4X4( &P1[0], &P1[8], &P2[4], &P2[8] );
  F[2] =  DET_4X4( &P1[0], &P1[4], &P2[4], &P2[8] );
  
  F[3] = -DET_4X4( &P1[4], &P1[8], &P2[0], &P2[8] );
  F[4] =  DET_4X4( &P1[0], &P1[8], &P2[0], &P2[8] );
  F[5] = -DET_4X4( &P1[0], &P1[4], &P2[0], &P2[8] );
  
  F[6] =  DET_4X4( &P1[4], &P1[8], &P2[0], &P2[4] );
  F[7] = -DET_4X4( &P1[0], &P1[8], &P2[0], &P2[4] );
  F[8] =  DET_4X4( &P1[0], &P1[4], &P2[0], &P2[4] );
}

void computeF( const float *F12, const float *F23, float *F13 )
{
  float F12p[9], F23p[9];
  float e1[3], e2[3], e2p[3], e3p[3];
  float P1[12], P2[12], P3[12], P1p[12], P2p[12], P3p[12];
  
  computeFp( F12, F12p, e1, e2 );
  computeM( F12p, e1, e2, P1, P2 );

  computeFp( F23, F23p, e2p, e3p );
  computeM( F23p, e2p, e3p, P2p, P3p );
  
  //  computeMfromP( P1, P2, P2p, P3p, P3 );
  //  computeFfromP( P1, P3, F13 );
  
  computeMfromP( P3p, P2p, P2, P1, P1p );
  computeFfromP( P1p, P3p, F13 );
  
  for ( int i=0; i<3; i++ )
  {
    printf("|");
    for ( int j=0; j<4; j++ )
      printf("%+5.3f ",P1[i*4+j] );
    printf("| |");
    for (  j=0; j<4; j++ )
      printf("%+5.3f ",P2[i*4+j] );
    printf("| |");
    for (  j=0; j<4; j++ )
      printf("%+5.3f ",P2p[i*4+j] );
    printf("| |");
    for (  j=0; j<4; j++ )
      printf("%+5.3f ",P3p[i*4+j] );
    printf("| |");
    for (  j=0; j<4; j++ )
      printf("%+5.3f ",P3[i*4+j] );
    printf("|\n");
  }
  printf("\n");
//   computeFp( F13, F12p, e1, e2 );
//   computeM( F12p, e1, e2, P1, P2 );
//   for ( i=0; i<3; i++ )
//   {
//     printf("|");
//     for ( int j=0; j<4; j++ )
//       printf("%+5.3f ",P1[i*4+j] );
//     printf("|     |");
//     for (  j=0; j<4; j++ )
//       printf("%+5.3f ",P2[i*4+j] );
//     printf("|\n");
//   }
  printf("\n");
  for ( i=0; i<3; i++ )
  {
    printf("| ");
    for ( int j=0; j<3; j++ )
      printf("%+6.4f ",F12p[i*3+j] );
    if (i==1)
      printf("|  \"+\"  | ");
    else
      printf("|       | ");
    for (  j=0; j<3; j++ )
      printf("%+6.4f ",F23p[i*3+j] );
    if (i==1)
      printf("|   =   | ");
    else
      printf("|       | ");
    for (  j=0; j<3; j++ )
      printf("%+6.4f ",F13[i*3+j] );
    printf("|\n");
  }
  printf("\n");
}

bool computeF( const vector<subWindowListAnim> &features,
	       int frame1, int frame2, int imgSizeX, int imgSizeY, float *F )
{
  float x1, y1, x2, y2, compX, compY, compW;
  vector<subWindowListAnim>::const_iterator iter;
  int i, j;
  
  int numFeatures=0;
  for ( iter = features.begin(); iter!=features.end(); iter++ )
  {
    if ( (iter->includesFrame(frame1)) &&
	 (iter->includesFrame(frame2)) )
    { // feature is present in both images
      numFeatures++;
    }
  }
  if (numFeatures<9) return false;
  //  printf("Using %i feature corespondences\n", numFeatures);

  float *pCBuf = (float*)malloc( numFeatures * 4 * sizeof(float) );
  float **pointCorresp = (float**)malloc( numFeatures * sizeof(float*) );
  for ( i=0; i<numFeatures; i++ )
    pointCorresp[i]=&pCBuf[i*4];
  for ( iter = features.begin(), i=0; iter!=features.end(); iter++ )
  {
    if ( (iter->includesFrame(frame1)) &&
	 (iter->includesFrame(frame2)) )
    { // feature is present in both images
      pointCorresp[i][0] = ((float)iter->getSubWindow(frame1).centerX()) /
	(float)imgSizeX * 4.0f - 2.0f;
      pointCorresp[i][1] = ((float)iter->getSubWindow(frame1).centerY()) /
	(float)imgSizeY * 4.0f - 2.0f;
      pointCorresp[i][2] = ((float)iter->getSubWindow(frame2).centerX()) /
	(float)imgSizeX * 4.0f - 2.0f;
      pointCorresp[i][3] = ((float)iter->getSubWindow(frame2).centerY()) /
	(float)imgSizeY * 4.0f - 2.0f;
      i++;
    }
  }
  
  float *aBuf = (float*)malloc( numFeatures * 9 * sizeof(float) );
  float **a = (float**)malloc( (numFeatures+1) * sizeof(float*) );
  for ( i=0; i<numFeatures; i++ )
    a[i+1] = &aBuf[i*9]-1;
  for ( i=0; i<numFeatures; i++ )
  {
    x1 = pointCorresp[i][0];
    y1 = pointCorresp[i][1];
    x2 = pointCorresp[i][2];
    y2 = pointCorresp[i][3];
    
    a[i+1][1] = x2*x1;
    a[i+1][2] = x2*y1;
    a[i+1][3] = x2;
    a[i+1][4] = y2*x1;
    a[i+1][5] = y2*y1;
    a[i+1][6] = y2;
    a[i+1][7] = x1;
    a[i+1][8] = y1;
    a[i+1][9] = 1.0f;
  }

  float vBuf[9][9];
  float *v[10];
  for ( i=0; i<9; i++ )
    v[i+1]=vBuf[i]-1;

  float w[10];

  svdcmp( a, numFeatures, 9, w, v );

  float f[3][3];

  for ( i=0; i<3; i++ )
    for ( j=0; j<3; j++ )
      F[i*3+j] = f[i][j]  = vBuf[8][i*3+j];

  //  printf("Fundamental matrix from SVD directly\n");
  for ( i=0; i<numFeatures; i++ )
  {
    x1 = pointCorresp[i][0];
    y1 = pointCorresp[i][1];
    x2 = pointCorresp[i][2];
    y2 = pointCorresp[i][3];
      
    compX = x1*f[0][0] + y1*f[1][0] + f[2][0];
    compY = x1*f[0][1] + y1*f[1][1] + f[2][1];
    compW = x1*f[0][2] + y1*f[1][2] + f[2][2];
    compX *= x2;
    compY *= y2;
    compW *= 1.0f;

    //    printf( "diff %i: (%5.2f, %5.2f, %5.2f)\n",
    //	    i, compX, compY, compW );
  }

  free(aBuf);
  free(a);
  free(pCBuf);
  free(pointCorresp);
  return true;
}

int main( int argc, char *argv[] )
{
  int width, height, length;
  int i,j,k;
  vector<subWindowListAnim> features;
  if ( argc>=2 )
  {
    movieIO movie;
    printf( "Preparing \"%s\"...\n", argv[1] );
    if ( !movie.open( argv[1] ) )
    {
      printf( "Error in preparing %s\n", argv[1] );
      exit(0);
    }
    width=movie.getWidth();
    height=movie.getHeight();
    length=movie.getHeight();

    printf("\n");
    featureCorrelator corl( "Initializing correlator: ", movie, true );
    printf("\n");
    features = corl.buildTrackedFeatureList( "Tracking Features: ", 10, 5 );
    printf("\n");
  }
  else
  {
    printf("Loading previously tracked feature data...\n");
    #define FEATURE_LIST_NAME features
    #define WIDTH_VAR width
    #define HEIGHT_VAR height
    #define LENGTH_VAR length
    #include "track1.out"
    printf("Loaded %i tracked features\n", features.size() );
  }

  float **Fbuf = (float**)malloc( length*length*sizeof(float*) );
  for ( i=length*length-1; i>=0; i-- )
    Fbuf[i] = NULL;
  float ***F = (float***)malloc( length*sizeof(float*) );
  for ( i=length-1; i>=0; i-- )
    F[i] = &Fbuf[i*length];

  float *Ftemp = (float*)malloc( 9*sizeof(float) );
  int steps;
  int *step = (int*)malloc(length*sizeof(int));
  bool found;
  
  step[0]=0;
  //  for ( i=length-1; i>0; i-- )
  for ( i=1; i<length; i++ )
  {
    found=false;
    for ( steps=1; (steps<=i) && (!found); steps++ )
    {
      step[steps]=i;
      for ( j=0; (j<i-steps+1) && ((steps>1)||(j==0)); j++ )
      {
	for ( k=1; k<steps; k++ )
	{
	  step[k] = k*i/steps + ( ((j/k)%2)==0 ? -j/steps : j/steps+1 );
	  if ( step[k]<=step[k-1]) break;
	}
	if ( (k<steps) || (step[k-1]>=i) ) continue;
	for ( k=0; k<steps; k++ )
	{
	  if ( F[step[k]][step[k+1]]==NULL )
	  {
	    if ( computeF( features, step[k], step[k+1],
			   width, height, Ftemp) )
	    {
	      F[step[k]][step[k+1]] = Ftemp;
	      Ftemp = (float*)malloc( 9*sizeof(float) );
	    }
	    else
	    {
	      break;
	    }
	  }
	}
	if ( k>=steps )
	{
	  found=true;
	  printf( "Found: (%i steps)", steps );
	  for ( k=0; k<=steps; k++ ) printf("%3i", step[k]);
	  printf("\n");
	  break;
	}
      }
      if ( found ) break;
    }
    if ( !found )
    {
      printf( "ERROR: Couldn't find fundamental matrices spanning from %i to %i\n",
	      0, i );
      break;
    }
    for ( k=2; k<=steps; k++ )
    {
      if ( F[0][step[k]]==NULL )
      {
 	F[0][step[k]] = (float*)malloc( 9*sizeof(float) );
	computeF( F[0][step[k-1]], F[step[k-1]][step[k]], F[0][step[k]] );
      }
    }
  }

  float Fp[9], e1[3], e2[3];
  for ( i=1; i<length; i++ )
  {
    computeFp( F[0][i], Fp, e1, e2 );
    e2[0] = (e2[0]/e2[2] + 2.0f) / 4.0f * width;
    e2[1] = (e2[1]/e2[2] + 2.0f) / 4.0f * height;
    printf("Camera center at frame %2i: (%3i, %3i, %7.4f)\n",
	   i, (int)e2[0], (int)e2[1], e2[2] );
  }
    
    
//     if ( computeF( features, 0, i, width, height, Ftemp ) )
//     {
//       F[0][i] = Ftemp;
//       Ftemp = (float*)malloc( 9*sizeof(float) );
//     }
//     else
//     {
      
//     }
  

//   percentBar bar(" Adding squares: " );
//   vector<subWindowListAnim>::iterator iter = features.begin();
//   unsigned char colour[3];
//   float curr=0.0f, end=features.size();
//   for ( ; iter!=features.end(); iter++ )
//   {
//     colour[0] = rand() % 256;
//     colour[1] = rand() % 256;
//     colour[2] = rand() % 256;
//     for ( int srcFrame=iter->getInterval().getEnd();
// 	  srcFrame>=iter->getInterval().getStart(); srcFrame-- )
//     {
//       rgbBuf = movie.getRGBFrame(srcFrame);
//       rgbBuf->addSquare( (*iter)[srcFrame], colour[0], colour[1], colour[2],
// 			 1, true );
//       movie.setRGBFrame( srcFrame, rgbBuf );
//       movie.releaseRGBFrame( rgbBuf );
//     }
//     bar.updatePercent( (curr+=1.0f)/end );
//   }
//   bar.endProgressBar();
  
//   printf("\n\n");
//   movie.play();
//   movie.close();
}
