gnn_lmbfgs.c Source File

00001 /***************************************************************************
00002  *  @file gnn_lmbfgs.c
00003  *  @brief gnn_lmbfgs Implementation.
00004  *
00005  *  @date   : 05-10-03 01:05
00006  *  @author : Pedro Ortega C. <peortega@dcc.uchile.cl>
00007  *  Copyright  2003  Pedro Ortega C.
00008  ****************************************************************************/
00009 /*
00010  *  This program is free software; you can redistribute it and/or modify
00011  *  it under the terms of the GNU General Public License as published by
00012  *  the Free Software Foundation; either version 2 of the License, or
00013  *  (at your option) any later version.
00014  *
00015  *  This program is distributed in the hope that it will be useful,
00016  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
00017  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00018  *  GNU Library General Public License for more details.
00019  *
00020  *  You should have received a copy of the GNU General Public License
00021  *  along with this program; if not, write to the Free Software
00022  *  Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
00023  */
00024 
00025 
00026 
00027 /**
00028  * @defgroup gnn_lmbfgs_doc gnn_lmbfgs : Limited Memory BFGS Algorithm.
00029  * @ingroup gnn_trainer_doc
00030  *
00031  * This trainer implements the Limited Memory Broyden-Fletcher-Goldfarb-Shanno
00032  * (LMBFGS) optimization algorithm. It is a so-called \em Quasi-Newton
00033  * method. The parameters are updated using the formula:
00034  *
00035  * \f[ w^{(\tau + 1)} = w^{(\tau)} + \alpha^{\tau} d^{(\tau)} \f]
00036  *
00037  * where \f$d^{(\tau)}\f$ is a search direction given by:
00038  *
00039  * \f[ d^{(\tau+1)} = -g^{(\tau+1)} + Ap + Bp \f]
00040  *
00041  * and where the following vectors have been defined:
00042  *
00043  * \f[ p = w^{(\tau + 1)} - w^{(\tau)}                  \f]
00044  * \f[ v = g^{(\tau + 1)} - g^{(\tau)}                  \f]
00045  *
00046  * and \f$g^{(\tau)}\f$ is the \f$\tau\f$-th gradient
00047  * \f$\frac{\partial E}{\partial w}\f$. The step size \f$\alpha^{(\tau)}\f$
00048  * is found using a line-minimization, which can be one of those available
00049  * in \ref gnn_line_search. The coefficients \f$A\f$ and \f$B\f$ are defined
00050  * as
00051  *
00052  * \f[ A = -\left ( 1 + \frac{ v^T v }{ p^T v } \right )
00053  *             \frac{ p^T g^{(\tau + 1)} }{ p^T v }
00054  *         + \frac{ v^T g^{(\tau + 1)} }{ p^T v }
00055  * \f]
00056  * \f[ B = \frac{ p^T g^{(\tau + 1)} }{ p^T v } \f]
00057  *
00058  * The difference with the BFGS method lies in the fact that the expensive
00059  * approximation and storage of the inverse Hessian matrix has been replaced
00060  * by the unit matrix. The resulting algorithm takes only \f$O(l)\f$ space,
00061  * where \f$l\f$ is the number of parameters of the Gradient Machine. The
00062  * algorithm is comparable to the conjugate gradients algorithm
00063  * (refer to \ref gnn_conjugate_gradient).
00064  */
00065 
00066 
00067 
00068 /******************************************/
00069 /* Include Files                          */
00070 /******************************************/
00071 
00072 #include <math.h>
00073 #include <gsl/gsl_blas.h>
00074 #include "gnn_utilities.h"
00075 #include "gnn_lmbfgs.h"
00076 
00077 
00078 
00079 /******************************************/
00080 /* Static Declaration                     */
00081 /******************************************/
00082 
00083 static int
00084 gnn_lmbfgs_reset (gnn_trainer *trainer);
00085 
00086 static int
00087 gnn_lmbfgs_iteration (gnn_lmbfgs *bf, double *A, double *B);
00088 
00089 static int
00090 gnn_lmbfgs_train (gnn_trainer *trainer);
00091 
00092 static void
00093 gnn_lmbfgs_destroy (gnn_trainer *trainer);
00094 
00095 
00096 
00097 
00098 /******************************************/
00099 /* Static Implementation                  */
00100 /******************************************/
00101 
00102 /**
00103  * @brief The trainer's "reset" implementation.
00104  * @ingroup gnn_lmbfgs_doc
00105  *
00106  * @param  trainer A pointer to a \ref gnn_lmbfgs.
00107  * @return Returns 0 if suceeded.
00108  */
00109 static int
00110 gnn_lmbfgs_reset (gnn_trainer *trainer)
00111 {
00112     gnn_lmbfgs *bf;
00113 
00114     assert (trainer != NULL);
00115 
00116     bf = (gnn_lmbfgs *) trainer;
00117 
00118     /* reset iteration counter */
00119     bf->iteration = 0;
00120 
00121     /* clear optimization information */
00122     gsl_vector_set_zero (bf->wnew);
00123     gsl_vector_set_zero (bf->wold);
00124     gsl_vector_set_zero (bf->gnew);
00125     gsl_vector_set_zero (bf->gold);
00126     gsl_vector_set_zero (bf->v);
00127     gsl_vector_set_zero (bf->p);
00128 
00129     return 0;
00130 }
00131 
00132 /**
00133  * @brief Computes the A and B coefficients.
00134  * @ingroup gnn_lmbfgs_doc
00135  *
00136  * @param  trainer A pointer to a \ref gnn_lmbfgs.
00137  * @param  A       A pointer where to store the \f$A\f$ coefficient.
00138  * @param  B       A pointer where to store the \f$B\f$ coefficient.
00139  * @return Returns 0 if suceeded.
00140  */
00141 static int
00142 gnn_lmbfgs_iteration (gnn_lmbfgs *bf, double *A, double *B)
00143 {
00144     double pTv;
00145     double pTg;
00146     double vTg;
00147     double vTv;
00148     
00149     /* compute p vector */
00150     gsl_vector_memcpy (bf->p, bf->wnew);
00151     gsl_vector_sub (bf->p, bf->wold);
00152 
00153     /* compute v vector */
00154     gsl_vector_memcpy (bf->v, bf->gnew);
00155     gsl_vector_sub (bf->v, bf->gold);
00156 
00157     /* compute pTv and its inverse */
00158     gsl_blas_ddot (bf->p, bf->v, &pTv);
00159     pTv = GNN_MAX (pTv, GNN_TINY);
00160 
00161     /* compute vTv */
00162     gsl_blas_ddot (bf->v, bf->v, &vTv);
00163 
00164     /* compute pTg */
00165     gsl_blas_ddot (bf->p, bf->gnew, &vTv);
00166 
00167     /* compute vTg */
00168     gsl_blas_ddot (bf->v, bf->gnew, &vTv);
00169 
00170     /* compute coefficients */
00171     *A = -(1.0 + vTv / pTv) * pTg / pTv + vTg / pTv;
00172     *B =  pTg / pTv;
00173 
00174     return 0;
00175 }
00176 
00177 /**
00178  * @brief The trainer's "train" implementation.
00179  * @ingroup gnn_lmbfgs_descent_doc
00180  *
00181  * @param  trainer A pointer to a \ref gnn_lmbfgs.
00182  * @return Returns 0 if succeeded.
00183  */
00184 static int
00185 gnn_lmbfgs_train (gnn_trainer *trainer)
00186 {
00187     double A;
00188     double B;
00189     double alpha;
00190 
00191     double ax, bx, cx;
00192     double fa, fb, fc;
00193 
00194     size_t s, n;
00195     gnn_lmbfgs *bf;
00196 
00197     /* get view */
00198     bf = (gnn_lmbfgs *) trainer;
00199 
00200     /* process minibatch */
00201     gnn_trainer_batch_process (trainer);
00202 
00203     /* copy gradient */
00204     gsl_vector_memcpy (bf->gnew, gnn_trainer_batch_get_dw (trainer));
00205 
00206     /* copy current parameter vector */
00207     gnn_node_param_get (trainer->node, bf->wnew);
00208 
00209     /* check if the algorithm should be restarted */
00210     if (bf->iteration % bf->restart == 0)
00211     {
00212         /* only take the negative gradient search direction at start */
00213         A = 0.0;
00214         B = 0.0;
00215     }
00216     else
00217     {
00218         /* compute new search direction */
00219         gnn_lmbfgs_iteration (bf, &A, &B);
00220     }
00221     
00222     /* prepare for next iteration */
00223     gsl_vector_memcpy (bf->wold, bf->wnew);
00224     gsl_vector_memcpy (bf->gold, bf->gnew);
00225 
00226     /* build new search direction */
00227     gsl_vector_memcpy (bf->line->d, bf->gnew);
00228     gsl_vector_scale (bf->line->d, -1.0);
00229     gsl_blas_daxpy (A, bf->p, bf->line->d);
00230     gsl_blas_daxpy (B, bf->v, bf->line->d);
00231 
00232     /* perform line search over the current minibatch's patterns */
00233     ax = 0.0;
00234     bx = bf->step;
00235     s = gnn_trainer_get_pattern_index (trainer);
00236     n = gnn_trainer_batch_get_size (trainer);
00237 
00238     gnn_line_search_bracket (bf->line, s, n, &ax, &bx, &cx, &fa, &fb, &fc);
00239     bf->alpha (bf->line, s, n, ax, bx, cx, &alpha, bf->tol);
00240 
00241     /* build new origin */
00242     gsl_vector_scale (bf->line->d, alpha);
00243     gsl_vector_add (bf->line->w, bf->line->d);
00244 
00245     /* update parameters */
00246     gnn_node_param_set (trainer->node, bf->line->w);
00247 
00248     /* move to next minibatch */
00249     gnn_trainer_batch_next (trainer);
00250 
00251     /* update iteration counter */
00252     bf->iteration++;
00253 
00254     return 0;
00255 }
00256 
00257 /**
00258  * @brief The trainers "destroy" implementation.
00259  * @ingroup gnn_lmbfgs_doc
00260  *
00261  * @param trainer A pointer to a \ref gnn_lmbfgs.
00262  */
00263 static void
00264 gnn_lmbfgs_destroy (gnn_trainer *trainer)
00265 {
00266     gnn_lmbfgs *bf;
00267 
00268     assert (trainer != NULL);
00269 
00270     bf = (gnn_lmbfgs *) trainer;
00271 
00272     if (bf->wnew != NULL)
00273         gsl_vector_free (bf->wnew);
00274     if (bf->wold != NULL)
00275         gsl_vector_free (bf->wold);
00276     if (bf->gnew != NULL)
00277         gsl_vector_free (bf->gnew);
00278     if (bf->gold != NULL)
00279         gsl_vector_free (bf->gold);
00280     if (bf->v != NULL)
00281         gsl_vector_free (bf->v);
00282     if (bf->p != NULL)
00283         gsl_vector_free (bf->p);
00284     if (bf->line != NULL)
00285         gnn_line_destroy (bf->line);
00286 
00287     return;
00288 }
00289 
00290 
00291 
00292 /******************************************/
00293 /* Public Interface                       */
00294 /******************************************/
00295 
00296 /**
00297  * @brief Creates a new LMBFGS trainer.
00298  * @ingroup gnn_lmbfgs_doc
00299  *
00300  * This function creates a new LMBFGS trainer (\ref gnn_lmbfgs).
00301  *
00302  * @param  node A pointer to a \ref gnn_node.
00303  * @param  crit A pointer to a \ref gnn_criterion.
00304  * @param  data A pointer to a \ref gnn_dataset.
00305  * @return Returns a pointer to a new \ref gnn_lmbfgs trainer.
00306  */
00307 gnn_trainer *
00308 gnn_lmbfgs_new (gnn_node *node, gnn_criterion *crit, gnn_dataset *data)
00309 {
00310     int status;
00311     size_t l;
00312     gnn_trainer *trainer;
00313     gnn_lmbfgs *bf;
00314 
00315     /* allocate memory for the trainer */
00316     bf = (gnn_lmbfgs *) malloc (sizeof (gnn_lmbfgs));
00317     if (bf == NULL)
00318     {
00319         GSL_ERROR_VAL ("couldn't allocate memory for gnn_lmbfgs",
00320                        GSL_ENOMEM, NULL);
00321     }
00322 
00323     /* get view as gnn_trainer */
00324     trainer = (gnn_trainer *) bf;
00325 
00326     /* initialize */
00327     status = gnn_trainer_init (trainer,
00328                                "gnn_lmbfgs",
00329                                node,
00330                                crit,
00331                                data,
00332                                gnn_lmbfgs_reset,
00333                                gnn_lmbfgs_train,
00334                                gnn_lmbfgs_destroy);
00335     if (status)
00336     {
00337         GSL_ERROR_VAL ("couldn't initialize gnn_lmbfgs",
00338                        GSL_EFAILED, NULL);
00339     }
00340 
00341     /* set fields */
00342     bf->step      = GNN_LMBFGS_STEP;
00343     bf->tol       = GNN_LMBFGS_TOL;
00344     bf->iteration = 0;
00345     bf->restart   = GNN_LMBFGS_RESTART;
00346 
00347     bf->alpha = GNN_LMBFGS_ALPHA;
00348 
00349     /* allocate memory for all needed buffers */
00350     l = gnn_node_param_get_size (node);
00351     bf->wnew = gsl_vector_alloc (l);
00352     bf->wold = gsl_vector_alloc (l);
00353     bf->gnew = gsl_vector_alloc (l);
00354     bf->gold = gsl_vector_alloc (l);
00355     bf->v    = gsl_vector_alloc (l);
00356     bf->p    = gsl_vector_alloc (l);
00357     bf->line = gnn_line_new (trainer->grad, NULL);
00358 
00359     if (   bf->wnew == NULL
00360         || bf->wold == NULL
00361         || bf->gnew == NULL
00362         || bf->gold == NULL
00363         || bf->v    == NULL
00364         || bf->p    == NULL
00365         || bf->line == NULL )
00366     {
00367         gnn_trainer_destroy (trainer);
00368         GSL_ERROR_VAL ("couldn't allocate memory for gnn_lmbfgs",
00369                        GSL_ENOMEM, NULL);
00370     }
00371 
00372     return trainer;
00373 }
00374 
00375 
00376 
00377 /**
00378  * @brief Sets the precision tolerance for the line search procedure.
00379  * @ingroup gnn_lmbfgs_doc
00380  *
00381  * @param trainer A pointer to a \ref gnn_lmbfgs.
00382  * @param tol A stricly positive real value.
00383  * @return Returns 0 if succeeded.
00384  */
00385 int
00386 gnn_lmbfgs_set_tol (gnn_trainer *trainer, double tol)
00387 {
00388     gnn_lmbfgs *bf;
00389 
00390     assert (trainer != NULL);
00391 
00392     /* check value */
00393     if (tol <= 0.0)
00394     {
00395         GSL_ERROR ("tolerance should be stricly greater than zero",
00396                    GSL_EINVAL);
00397     }
00398 
00399     /* set value */
00400     bf = (gnn_lmbfgs *) trainer;
00401     bf->tol = tol;
00402 
00403     return 0;
00404 }
00405 
00406 /**
00407  * @brief Gets the tolerance for the line search procedure.
00408  * @ingroup gnn_lmbfgs_doc
00409  *
00410  * @param trainer A pointer to a \ref gnn_lmbfgs.
00411  * @return Returns the tolerance's value.
00412  */
00413 double
00414 gnn_lmbfgs_get_tol (gnn_trainer *trainer)
00415 {
00416     gnn_lmbfgs *bf;
00417 
00418     assert (trainer != NULL);
00419 
00420     bf = (gnn_lmbfgs *) trainer;
00421 
00422     return bf->tol;
00423 }
00424 
00425 /**
00426  * @brief Sets the initial step for the interval bracketing procedure.
00427  * @ingroup gnn_lmbfgs_doc
00428  *
00429  * @param trainer A pointer to a \ref gnn_lmbfgs.
00430  * @param step A stricly positive real value.
00431  * @return Returns 0 if succeeded.
00432  */
00433 int
00434 gnn_lmbfgs_set_step (gnn_trainer *trainer, double step)
00435 {
00436     gnn_lmbfgs *bf;
00437 
00438     assert (trainer != NULL);
00439 
00440     /* check value */
00441     if (step <= 0.0)
00442     {
00443         GSL_ERROR ("step should be stricly greater than zero",
00444                    GSL_EINVAL);
00445     }
00446 
00447     /* set value */
00448     bf = (gnn_lmbfgs *) trainer;
00449     bf->step = step;
00450 
00451     return 0;
00452 }
00453 
00454 /**
00455  * @brief Gets the initial step for the interval bracketing procedure.
00456  * @ingroup gnn_lmbfgs_doc
00457  *
00458  * @param trainer A pointer to a \ref gnn_lmbfgs.
00459  * @return The trainer's internal step.
00460  */
00461 double
00462 gnn_lmbfgs_get_step (gnn_trainer *trainer)
00463 {
00464     gnn_lmbfgs *bf;
00465 
00466     assert (trainer != NULL);
00467 
00468     bf = (gnn_lmbfgs *) trainer;
00469 
00470     return bf->step;
00471 }
00472 
00473 /**
00474  * @brief Sets the number of iterations before restarting.
00475  * @ingroup gnn_lmbfgs_doc
00476  *
00477  * @param trainer A pointer to a \ref gnn_lmbfgs.
00478  * @param restart A stricly positive integer.
00479  * @return Returns 0 if succeeded.
00480  */
00481 int
00482 gnn_lmbfgs_set_restart (gnn_trainer *trainer, size_t restart)
00483 {
00484     gnn_lmbfgs *bf;
00485 
00486     assert (trainer != NULL);
00487 
00488     /* check value */
00489     if (restart <= 0.0)
00490     {
00491         GSL_ERROR ("restart iteration should be stricly greater than zero",
00492                    GSL_EINVAL);
00493     }
00494 
00495     /* set value */
00496     bf = (gnn_lmbfgs *) trainer;
00497     bf->restart = restart;
00498 
00499     return 0;
00500 }
00501 
00502 /**
00503  * @brief Gets the number of iterations before reinitializing the direction.
00504  * @ingroup gnn_lmbfgs_doc
00505  *
00506  * This function returns the number of iterations executed by the LMBFGS
00507  * trainer before reinitializing the search direction.
00508  *
00509  * @param trainer A pointer to a \ref gnn_lmbfgs.
00510  * @return Returns the number of iterations.
00511  */
00512 size_t
00513 gnn_lmbfgs_get_restart (gnn_trainer *trainer)
00514 {
00515     gnn_lmbfgs *bf;
00516 
00517     assert (trainer != NULL);
00518 
00519     bf = (gnn_lmbfgs *) trainer;
00520 
00521     return bf->restart;
00522 }
00523 
00524 /**
00525  * @brief Sets the line search procedure.
00526  * @ingroup gnn_lmbfgs_doc
00527  *
00528  * This function sets a new line search procedure used by the LMBFGS trainer.
00529  *
00530  * \code
00531  * gnn_trainer *trainer;
00532  * trainer = gnn_lmbfgs_new (node, crit, data);
00533  *
00534  * // use the Golden-Section line search
00535  * gnn_lmbfgs_set_line_search (trainer, gnn_line_search_golden);
00536  * \endcode
00537  *
00538  * Please refer to (\ref gnn_line_search_doc) for the available line search
00539  * procedures.
00540  *
00541  * @param trainer A pointer to a \ref gnn_lmbfgs.
00542  * @param lsearch A pointer to a line search procedure.
00543  * @return Returns 0 if succeeded.
00544  */
00545 int
00546 gnn_lmbfgs_set_line_search (gnn_trainer *trainer, gnn_line_search_type lsearch)
00547 {
00548     gnn_lmbfgs *bf;
00549 
00550     assert (trainer != NULL);
00551     assert (lsearch != NULL);
00552 
00553     /* set value */
00554     bf = (gnn_lmbfgs *) trainer;
00555     bf->alpha = lsearch;
00556 
00557     return 0;
00558 }
00559 
00560 /**
00561  * @brief Gets the installed line search procedure.
00562  * @ingroup gnn_lmbfgs_doc
00563  *
00564  * @param trainer A pointer to a \ref gnn_lmbfgs.
00565  * @return Returns a pointer to the installed line-search procedure.
00566  */
00567 gnn_line_search_type
00568 gnn_lmbfgs_get_alpha (gnn_trainer *trainer)
00569 {
00570     gnn_lmbfgs *bf;
00571 
00572     assert (trainer != NULL);
00573 
00574     bf = (gnn_lmbfgs *) trainer;
00575 
00576     return bf->alpha;
00577 }
00578 
00579 
00580