/* 
 *  M_APM  -  mapmfact.c
 *
 *  Copyright (C) 1999 - 2007   Michael C. Ring
 *
 *  Permission to use, copy, and distribute this software and its
 *  documentation for any purpose with or without fee is hereby granted,
 *  provided that the above copyright notice appear in all copies and
 *  that both that copyright notice and this permission notice appear
 *  in supporting documentation.
 *
 *  Permission to modify the software is granted. Permission to distribute
 *  the modified code is granted. Modifications are to be distributed by
 *  using the file 'license.txt' as a template to modify the file header.
 *  'license.txt' is available in the official MAPM distribution.
 *
 *  This software is provided "as is" without express or implied warranty.
 */

/*
 *      $Id: mapmfact.c,v 1.11 2007/12/03 01:51:50 mike Exp $
 *
 *      This file contains the FACTORIAL function.
 *
 *      $Log: mapmfact.c,v $
 *      Revision 1.11  2007/12/03 01:51:50  mike
 *      Update license
 *
 *      Revision 1.10  2003/07/21 21:05:31  mike
 *      facilitate 'gcov' code coverage tool by making an array smaller
 *
 *      Revision 1.9  2002/11/03 21:27:28  mike
 *      Updated function parameters to use the modern style
 *
 *      Revision 1.8  2000/06/14 20:36:16  mike
 *      increase size of DOS array
 *
 *      Revision 1.7  2000/05/29 13:15:59  mike
 *      minor tweaks, fixed comment
 *
 *      Revision 1.6  2000/05/26 16:39:03  mike
 *      minor update to comments and code
 *
 *      Revision 1.5  2000/05/25 23:12:53  mike
 *      change 'nd' calculation
 *
 *      Revision 1.4  2000/05/25 22:17:45  mike
 *      implement new algorithm for speed. approx 5 - 10X
 *      faster on my PC when N! is large (> 6000)
 *
 *      Revision 1.3  1999/06/19 21:25:21  mike
 *      changed local static variables to MAPM stack variables
 *
 *      Revision 1.2  1999/05/23 18:21:12  mike
 *      minor variable name tweaks
 *
 *      Revision 1.1  1999/05/15 21:06:11  mike
 *      Initial revision
 */

/*
 *      Brief explanation of the factorial algorithm.
 *      ----------------------------------------------
 *
 *      The old algorithm simply multiplied N * (N-1) * (N-2) etc, until 
 *	the number counted down to '2'. So one term of the multiplication 
 *	kept getting bigger while multiplying by the next number in the 
 *	sequence. 
 *
 *      The new algorithm takes advantage of the fast multiplication 
 *	algorithm. The "ideal" setup for fast multiplication is when 
 *	both numbers have approx the same number of significant digits 
 *	and the number of digits is very near (but not over) an exact 
 *	power of 2.
 *
 *	So, we will multiply N * (N-1) * (N-2), etc until the number of
 *	significant digits is approx 256.
 *
 *	Store this temp product into an array.
 *
 *	Then we will multiply the next sequence until the number of
 *	significant digits is approx 256.
 *
 *	Store this temp product into the next element of the array.
 *
 *	Continue until we've counted down to 2.
 *
 *	We now have an array of numbers with approx the same number
 *	of digits (except for the last element, depending on where it 
 *	ended.) Now multiply each of the array elements together to
 *	get the final product.
 *
 *      The array multiplies are done as follows (assume we used 11
 *	array elements for this example, indicated by [0] - [10] ) :
 *
 *	initial    iter-1     iter-2       iter-3     iter-4
 *
 *	  [0] 
 *	     *  ->  [0]
 *	  [1]
 *                      * ->    [0]
 *
 *	  [2] 
 *	     *  ->  [1]
 *	  [3]
 *                                   * ->   [0] 
 *
 *	  [4] 
 *	     *  ->  [2]
 *	  [5]
 *
 *                      * ->    [1]
 *
 *	  [6] 
 *	     *  ->  [3]                           *  ->  [0]
 *	  [7]
 *
 *
 *	  [8] 
 *	     *  ->  [4]
 *	  [9]
 *                      * ->    [2]    ->   [1]
 *
 *
 *	  [10]  ->  [5]
 *
 */

#include "m_apm_lc.h"

/* define size of local array for temp storage */

#define NDIM 32

/****************************************************************************/
void	m_apm_factorial(M_APM moutput, M_APM minput)
{
int     ii, nmul, ndigits, nd, jj, kk, mm, ct;
M_APM   array[NDIM];
M_APM   iprod1, iprod2, tmp1, tmp2;

/* return 1 for any input <= 1 */

if (m_apm_compare(minput, MM_One) <= 0)
  {
   m_apm_copy(moutput, MM_One);
   return;
  }

ct       = 0;
mm       = NDIM - 2;
ndigits  = 256;
nd       = ndigits - 20;
tmp1     = m_apm_init();
tmp2     = m_apm_init();
iprod1   = m_apm_init();
iprod2   = m_apm_init();
array[0] = m_apm_init();

m_apm_copy(tmp2, minput);

/* loop until multiply count-down has reached '2' */

while (TRUE)
  {
   m_apm_copy(iprod1, MM_One);

   /* 
    *   loop until the number of significant digits in this 
    *   partial result is slightly less than 256
    */

   while (TRUE)
     {
      m_apm_multiply(iprod2, iprod1, tmp2);

      m_apm_subtract(tmp1, tmp2, MM_One);

      m_apm_multiply(iprod1, iprod2, tmp1);

      /*
       *  I know, I know.  There just isn't a *clean* way 
       *  to break out of 2 nested loops.
       */

      if (m_apm_compare(tmp1, MM_Two) <= 0)
        goto PHASE2;

      m_apm_subtract(tmp2, tmp1, MM_One);

      if (iprod1->m_apm_datalength > nd)
        break;
     }

   if (ct == (NDIM - 1))
     {
      /* 
       *    if the array has filled up, start multiplying
       *    some of the partial products now.
       */

      m_apm_copy(tmp1, array[mm]);
      m_apm_multiply(array[mm], iprod1, tmp1);

      if (mm == 0)
        {
         mm = NDIM - 2;
	 ndigits = ndigits << 1;
         nd = ndigits - 20;
	}
      else
         mm--;
     }
   else
     {
      /* 
       *    store this partial product in the array
       *    and allocate the next array element
       */

      m_apm_copy(array[ct], iprod1);
      array[++ct] = m_apm_init();
     }
  }

PHASE2:

m_apm_copy(array[ct], iprod1);

kk = ct;

while (kk != 0)
  {
   ii = 0;
   jj = 0;
   nmul = (kk + 1) >> 1;

   while (TRUE)
     {
      /* must use tmp var when ii,jj point to same element */

      if (ii == 0)
        {
         m_apm_copy(tmp1, array[ii]);
         m_apm_multiply(array[jj], tmp1, array[ii+1]);
        }
      else
         m_apm_multiply(array[jj], array[ii], array[ii+1]);

      if (++jj == nmul)
        break;

      ii += 2;
     }

   if ((kk & 1) == 0)
     {
      jj = kk >> 1;
      m_apm_copy(array[jj], array[kk]);
     }

   kk = kk >> 1;
  }

m_apm_copy(moutput, array[0]);

for (ii=0; ii <= ct; ii++)
  {
   m_apm_free(array[ii]);
  }

m_apm_free(tmp1);
m_apm_free(tmp2);
m_apm_free(iprod1);
m_apm_free(iprod2);
}
/****************************************************************************/