gnn_evaluation.c Source File

00001 /***************************************************************************
00002  *  @file gnn_evaluation.c
00003  *  @brief Evaluation Implementation.
00004  *
00005  *  @date   : 16-09-03 23:47
00006  *  @author : Pedro Ortega C. <peortega@dcc.uchile.cl>
00007  *  Copyright  2003  Pedro Ortega C.
00008  ****************************************************************************/
00009 /*
00010  *  This program is free software; you can redistribute it and/or modify
00011  *  it under the terms of the GNU General Public License as published by
00012  *  the Free Software Foundation; either version 2 of the License, or
00013  *  (at your option) any later version.
00014  *
00015  *  This program is distributed in the hope that it will be useful,
00016  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
00017  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00018  *  GNU Library General Public License for more details.
00019  *
00020  *  You should have received a copy of the GNU General Public License
00021  *  along with this program; if not, write to the Free Software
00022  *  Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
00023  */
00024 
00025 /**
00026  * @brief High-Level \ref gnn_node evaluation interface.
00027  * @defgroup gnn_evaluation_doc Evaluations.
00028  * @ingroup  libgnn
00029  *
00030  * The \ref gnn_node structure, taken the way it is, provides a very low-level
00031  * implementation of gradient models. Its programming interface it very
00032  * versatile, but sometimes, for simpler problems, it can be hard to use.
00033  *
00034  * The present module provides a high-level programming interface, where
00035  * the 3 most frequent tasks have been packed into complex, but easy-to-use
00036  * routines.
00037  *
00038  * The API provides routines for the following tasks:
00039  * - Evaluation of input patterns: Given the model \f$F(x,w)\f$ implemented
00040  *   by a particular \ref gnn_node, take a set of input vectors
00041  *   \f$X = \{x^p \in \mathbb{R}^n, p=0,1,\ldots,P-1\}\f$,
00042  *   and evaluate all (or some) outputs \f$y^p=F(x^p, \overline{w})\f$
00043  *   for a fixed parameter vector \f$\overline{w}\f$.
00044  * - Evaluation of the error and its gradients: Given the model \f$F(x,w)\f$
00045  *   implemented by a particular \ref gnn_node, an error criterion \f$E(y,t)\f$
00046  *   implemented by a \ref gnn_criterion and a pattern set (or a subset of)
00047  *   \f${(x^k, t^k, p^k)|k=0,\ldots,P-1}\f$, evaluate the averaged error
00048  *   \f$<E>\f$, gradient \f$<\frac{\partial E}{\partial x}>\f$ and
00049  *   gradient \f$<\frac{\partial E}{\partial w}>\f$.
00050  * - Line evaluation procedures: Consider the one-dimensional function
00051  *   \f$g(\alpha) = F(x, w_0 + \alpha d)\f$, where \f$w_0\f$ is a vector
00052  *   called the \em origin, \f$d\f$ is a vector called the directional vector,
00053  *   and \f$\alpha\f$ is a scalar. Then, compute the averaged error
00054  *   \f$<E>\f$ and the derivative along the given line
00055  *   \f$<\frac{d E}{d \alpha}>
00056  *   = <\frac{\partial E}{\partial w} \frac{\partial w}{\partial \alpha}>\f$.
00057  *
00058  * All the previous evaluation tasks need a special structure
00059  * containing all the needed buffers in order to perform the required
00060  * evaluations. There are three types of buffers:
00061  * - \ref gnn_eval : A buffer for computing the output evaluations.
00062  * - \ref gnn_grad : A buffer for computing the error and gradients.
00063  * - \ref gnn_line : A buffer for computing the line evaluation procedures.
00064  *
00065  * Before calling the functions, you should first create an appropiate
00066  * buffer. This is done by calling \ref gnn_eval_new for outputs,
00067  * \ref gnn_grad_new for gradients and \ref gnn_line_new for line evaluations.
00068  * These buffers are then given to the evaluation functions. After using
00069  * them, you should free the buffers with \ref gnn_eval_destroy,
00070  * \ref gnn_grad_destroy or \ref gnn_line_destroy respectively.
00071  *
00072  * Usually, you should call the evaluation functions giving them the
00073  * needed buffers, and after then access the its fields in order to obtain
00074  * the result using the provided macros and functions.
00075  *
00076  * When an evaluation buffer is destroyed, its associated objects, like
00077  * the \ref gnn_node or \ref gnn_crit used for its allocation, won't be
00078  * destroyed. These should be deallocated separately.
00079  * <b> These three type of buffers do not own the associated \ref gnn_node,
00080  * \ref gnn_dataset, and all required structures to build them. This
00081  * means that these have to be destroyed separately. </b>
00082  *
00083  *
00084  *
00085  * \section gnn_eval_use Using gnn_eval.
00086  *
00087  * The \ref gnn_eval evaluation interface takes a given model, and a set
00088  * of inputs, and computes the corresponding outputs.
00089  *
00090  * Example use:
00091    \code
00092    gnn_node   *node;
00093    gnn_input  *input;
00094    gnn_output *output;
00095    gnn_eval   *eval;
00096 
00097    // Create the needed objects.
00098    node   = my_new_custom_node ();
00099    input  = my_new_custom_input ();
00100    output = my_new_custom_output ();
00101 
00102    // Allocate the evaluation buffer.
00103    eval = gnn_eval_new (node, input, output);
00104 
00105    // Compute all estimated outputs: the gnn_node evaluates each
00106    // input example and puts the results into the gnn_output.
00107    gnn_eval_all (eval);
00108    
00109    // Now, "output" contains the results. Do something with them.
00110    ...
00111    
00112    // Destroy buffer and objects.
00113    gnn_eval_destroy (eval);
00114    gnn_node_destroy (node);
00115    gnn_input_destroy (input);
00116    gnn_output_destroy (output);
00117    
00118    \endcode
00119  *
00120  *
00121  *
00122  * \section gnn_grad_use Using gnn_grad.
00123  *
00124  * The \ref gnn_grad evaluation interface takes a given model, a criterion and
00125  * a dataset, and computes the weighted sum of errors \f$<E>\f$,
00126  * the weighted sum of the gradient with respect to the inputs
00127  * \f$<\frac{\partial E}{\partial x}>\f$,
00128  * or the weighted sum of the gradients with respect to the parameters
00129  * \f$<\frac{\partial E}{\partial w}>\f$.
00130  *
00131  * When calling one of the evaluation routines (that is, either by calling
00132  * \ref gnn_grad_pats or \ref gnn_grad_all), you have to specify how
00133  * far the computation should be performed. That's the purpose of the last
00134  * flag parameter:
00135  *
00136  * - gnnGradE : Compute only the weighted sum of the resulting errors \f$<E>\f$.
00137  * - gnnGradDx : Compute \f$<E>\f$ \b and \f$<\frac{\partial E}{\partial x}>\f$.
00138  * - gnnGradDw : Compute \f$<E>\f$, \f$<\frac{\partial E}{\partial x}>\f$ and
00139  *               \f$<\frac{\partial E}{\partial w}>\f$.
00140  *
00141  * The values that haven't been computed shouldn't be accessed, because
00142  * their values are undefined.
00143  *
00144  * Example use:
00145    \code
00146    double E;
00147    gsl_vector *Dx;
00148    
00149    gnn_node      *node;
00150    gnn_criterion *crit;
00151    gnn_dataset   *data;
00152    gnn_grad      *grad;
00153 
00154    // Create the needed objects.
00155    node = my_new_custom_node ();
00156    crit = my_new_custom_criterion ();
00157    data = my_new_custom_dataset ();
00158 
00159    // Allocate the gradient evaluation buffer.
00160    grad = gnn_grad_new (node, crit, data);
00161 
00162    // In this example, we need to compute the gradient with respect
00163    // to the inputs. So we will execute the evaluation routine with the
00164    // "gnnGradDx" flag.
00165    gnn_grad_all (grad, gnnGradDx);
00166 
00167    // We can retrieve the results now by using the macros.
00168    
00169    // First, get the weighted error which was also computed.
00170    E = GNN_GRAD_E (grad);
00171    
00172    // Now, get the gradient. First allocate a vector were to store the
00173    // result, and then use the macro to access the buffer's internal
00174    // Dx vector.
00175    Dx = gsl_vector_alloc (GNN_GRAD_INPUT_SIZE (grad));
00176    gsl_vector_memcpy (Dx, GNN_GRAD_DX (grad));
00177    
00178    // Do something else here...
00179    ...
00180 
00181    // Destroy buffer and objects.
00182    gnn_grad_destroy (grad);
00183    gnn_node_destroy (node);
00184    gnn_criterion_destroy (crit);
00185    gnn_dataset_destroy (data);
00186 
00187    \endcode
00188  *
00189  *
00190  *
00191  * \section gnn_line_use Using gnn_line.
00192  *
00193  * The \ref gnn_line evaluation interface takes an already built \ref gnn_grad
00194  * buffer plus a direction, and computes the error or the error derivative
00195  * along the given line.
00196  *
00197  * When calling one of the evaluation routines (that is, either by calling
00198  * \ref gnn_line_pats or \ref gnn_line_all), you have to specify how
00199  * far the computation should be performed. That's the purpose of the last
00200  * flag parameter:
00201  *
00202  * - gnnLineE  : Compute only the weighted sum of the resulting error \f$<E>\f$.
00203  * - gnnLineDE : Compute \f$<E>\f$ \b and \f$<\frac{d E}{d \alpha}>\f$.
00204  *
00205  * The values that haven't been computed shouldn't be accessed, because
00206  * their values are undefined.
00207  *
00208  * Example use:
00209    \code
00210    double E;
00211    double DE;
00212    gsl_vector *d;
00213 
00214    gnn_grad *grad;
00215    gnn_line *line;
00216 
00217    // Create the needed objects.
00218    grad = my_new_custom_grad ();
00219    
00220    // In this example, we want to compute the error derivative along
00221    // the first parameter. So we have to prepare the corresponding
00222    // directional vector.
00223    d = gsl_vector_calloc (GNN_GRAD_PARAMETER_SIZE (grad));
00224    gsl_vector_set (d, 0, 1.0);
00225    
00226    // Allocate the line evaluation buffer.
00227    line = gnn_line_new (grad, d);
00228 
00229    // We have to execute the evaluation routine with the "gnnLineDE"
00230    // flag in order to get the derivative computed.
00231    gnn_line_all (line, 0.0, gnnLineDE);
00232 
00233    // Get the results by using the macros.
00234    E  = GNN_LINE_E (line);
00235    DE = GNN_LINE_DE (line);
00236    
00237    // Of course, the evaluation can be performed at another point which
00238    // can be more convenient for the application.
00239    gnn_line_all (line, 1.0, gnnLineDE);
00240    ...
00241 
00242    // Do something else here...
00243    ...
00244 
00245    // Destroy buffer and objects.
00246    gnn_line_destroy (line);
00247    gnn_node_destroy (GNN_GRAD_GET_NODE (grad));
00248    gnn_criterion_destroy (GNN_GRAD_GET_CRITERION (grad));
00249    gnn_dataset_destroy (GNN_GRAD_GET_DATASET (grad));
00250    gnn_grad_destroy (grad);
00251 
00252    \endcode
00253  *
00254  */
00255 
00256 /******************************************/
00257 /* Include Files                          */
00258 /******************************************/
00259 
00260 #include "gnn_evaluation.h"
00261 
00262 
00263 
00264 /******************************************/
00265 /* Public Interface Implementation        */
00266 /******************************************/
00267 
00268 /*************************/
00269 /* a) Pattern Evaluation */
00270 /*************************/
00271 
00272 /**
00273  * @brief Builds a new buffer for output evaluation.
00274  * @ingroup gnn_evaluation_doc
00275  *
00276  * This function builds a new \ref gnn_eval buffer structure for computing
00277  * outputs.
00278  *
00279  * @param node A pointer to a \ref gnn_node.
00280  * @param in   A pointer to a \ref gnn_input input sampler.
00281  * @param out  A pointer to a \ref gnn_output output device.
00282  * @return Returns a pointer to a new buffer structure of NULL if failed.
00283  */
00284 gnn_eval *
00285 gnn_eval_new (gnn_node *node, gnn_input *in, gnn_output *out)
00286 {
00287     size_t n;
00288     size_t m;
00289     size_t l;
00290     size_t P;
00291     gnn_eval *eval;
00292     
00293     assert (node != NULL);
00294     assert (in   != NULL);
00295     assert (out  != NULL);
00296     
00297     /* get sizes */
00298     n = gnn_node_input_get_size (node);
00299     m = gnn_node_output_get_size (node);
00300     l = gnn_node_param_get_size (node);
00301     P = gnn_input_get_size (in);
00302     
00303     /* check sizes */
00304     if (n != gnn_input_sample_get_size (in))
00305     {
00306         GSL_ERROR_VAL ("the node's input and the input device's sample size "
00307                        "should match", GSL_EINVAL, NULL);
00308     }
00309     if (gnn_output_is_random_access (out) && m != gnn_output_sample_get_size (out))
00310     {
00311         GSL_ERROR_VAL ("the node's and the dataset's output size "
00312                        "should match", GSL_EINVAL, NULL);
00313     }
00314     if (gnn_output_is_random_access (out) && P != gnn_output_get_size (out))
00315     {
00316         GSL_ERROR_VAL ("a random access output device should handle the same "
00317                        "amount of outputs as available inputs",
00318                        GSL_EINVAL, NULL);
00319     }
00320 
00321     /* allocate buffer structure */
00322     eval = (gnn_eval *) malloc (sizeof (*eval));
00323     if (eval == NULL)
00324     {
00325         GSL_ERROR_VAL ("could not allocate memory for gnn_eval",
00326                        GSL_ENOMEM, NULL);
00327     }
00328 
00329     /* allocate buffer output vector */
00330     eval->y = gsl_vector_alloc (m);
00331     if (eval->y == NULL)
00332     {
00333         GSL_ERROR_VAL ("could not allocate memory for gnn_eval buffers",
00334                        GSL_ENOMEM, NULL);
00335     }
00336 
00337     /* set fields */
00338     eval->n      = n;
00339     eval->m      = m;
00340     eval->l      = l;
00341     eval->P      = P;
00342     eval->node   = node;
00343     eval->input  = in;
00344     eval->output = out;
00345     
00346     return eval;
00347 }
00348 
00349 /**
00350  * @brief Destroys an evaluation buffer.
00351  * @ingroup gnn_evaluation_doc
00352  *
00353  * @param eval A pointer a \ref gnn_eval buffer.
00354  */
00355 void
00356 gnn_eval_destroy (gnn_eval *eval)
00357 {
00358     assert (eval != NULL);
00359     
00360     if (eval->y != NULL)
00361         gsl_vector_free (eval->y);
00362     free (eval);
00363 }
00364 
00365 /**
00366  * @brief Computes the outputs for a given input set.
00367  * @ingroup gnn_evaluation_doc
00368  *
00369  * This function computes, given a set of input samples, all associated
00370  * outputs, storing them into the output device provided by the \ref gnn_eval
00371  * structure. The patterns that are evaluated are those between \f$k=s\f$
00372  * and \f$k=s+n\f$.
00373  *
00374  * @param eval   A pointer to a \ref gnn_eval.
00375  * @param s      The index of the first input sample to be evaluated.
00376  * @param n      The amount of samples to be evaluated.
00377  * @return Returns 0 if suceeded.
00378  */
00379 int
00380 gnn_eval_pats (gnn_eval *eval, size_t s, size_t n)
00381 {
00382     size_t k;
00383     const gsl_vector *x;
00384 
00385     assert (eval != NULL);
00386 
00387     /* compute outputs */
00388     for (k=s; k<s+n; ++k)
00389     {
00390         /* get k-th input sample */
00391         x = gnn_input_get (eval->input, k);
00392 
00393         /* evaluate */
00394         gnn_node_evaluate_init (eval->node);
00395         gnn_node_evaluate_f (eval->node, x, eval->y);
00396         
00397         /* store output */
00398         gnn_output_put (eval->output, k, eval->y);
00399     }
00400 
00401     return 0;
00402 }
00403 
00404 /**
00405  * @brief Computes the outputs for a given input set.
00406  * @ingroup gnn_evaluation_doc
00407  *
00408  * This function computes, given a set of input samples, all associated
00409  * outputs, storing them into the output device provided by the \ref gnn_eval
00410  * structure. The patterns that are evaluated are those between \f$k=s\f$
00411  * and \f$k=s+n\f$.
00412  *
00413  * @param eval   A pointer to a \ref gnn_eval.
00414  * @param s      The index of the first input sample to be evaluated.
00415  * @param n      The amount of samples to be evaluated.
00416  * @return Returns 0 if suceeded.
00417  */
00418 int
00419 gnn_eval_all (gnn_eval *eval)
00420 {
00421     return gnn_eval_pats (eval, 0, eval->P);
00422 }
00423 
00424 
00425 
00426 /************************************/
00427 /* b) Error and Gradient Evaluation */
00428 /************************************/
00429 
00430 /**
00431  * @brief Builds a new buffer for gradient evaluation.
00432  * @ingroup gnn_evaluation_doc
00433  *
00434  * This function builds a new \ref gnn_grad buffer structure for computing
00435  * gradients.
00436  *
00437  * @param node A pointer to a \ref gnn_node.
00438  * @param crit A pointer to a \ref gnn_criterion.
00439  * @param data A pointer to a \ref gnn_dataset.
00440  * @return Returns a pointer to a new buffer structure of NULL if failed.
00441  */
00442 gnn_grad *
00443 gnn_grad_new (gnn_node *node, gnn_criterion *crit, gnn_dataset *data)
00444 {
00445     size_t n;
00446     size_t m;
00447     size_t l;
00448     size_t P;
00449     gnn_grad *grad;
00450 
00451     assert (node != NULL);
00452 
00453     /* get sizes */
00454     n = gnn_node_input_get_size (node);
00455     m = gnn_node_output_get_size (node);
00456     l = gnn_node_param_get_size (node);
00457     P = gnn_dataset_get_size (data);
00458 
00459     /* check sizes */
00460     if ( n != gnn_dataset_input_get_size (data) )
00461     {
00462         GSL_ERROR_VAL ("gnn_node's and gnn_dataset's input sizes should match",
00463                        GSL_EINVAL, NULL);
00464     }
00465 
00466     if ( m != gnn_dataset_output_get_size (data) )
00467     {
00468         GSL_ERROR_VAL ("gnn_node's and gnn_dataset's output sizes should match",
00469                        GSL_EINVAL, NULL);
00470     }
00471 
00472     if ( (crit != NULL) && (m != gnn_criterion_get_size (crit)) )
00473     {
00474         GSL_ERROR_VAL ("gnn_node's output size and gnn_criterion's size "
00475                        "should match", GSL_EINVAL, NULL);
00476     }
00477 
00478     /* allocate structure */
00479     grad = (gnn_grad *) malloc (sizeof (*grad));
00480     if (grad == NULL)
00481     {
00482         GSL_ERROR_VAL ("could allocate memory for gnn_grad",
00483                        GSL_ENOMEM, NULL);
00484     }
00485 
00486     /* build buffers */
00487     grad->y   = gsl_vector_alloc (m);
00488     grad->w   = gsl_vector_alloc (l);
00489     grad->dy  = gsl_vector_alloc (m);
00490     grad->dx  = gsl_vector_alloc (n);
00491     grad->mdx = gsl_vector_alloc (n);
00492     grad->dw  = gsl_vector_alloc (l);
00493     grad->mdw = gsl_vector_alloc (l);
00494     grad->h   = gsl_vector_alloc (l);
00495     if (    (grad->y   == NULL)
00496          || (grad->w   == NULL)
00497          || (grad->dy  == NULL)
00498          || (grad->dx  == NULL)
00499          || (grad->mdx == NULL)
00500          || (grad->dw  == NULL)
00501          || (grad->mdw == NULL)
00502          || (grad->h   == NULL) )
00503     {
00504         gnn_grad_destroy (grad);
00505         GSL_ERROR_VAL ("could allocate memory for gnn_grad's buffers",
00506                        GSL_ENOMEM, NULL);
00507     }
00508 
00509     /* set pointers and values */
00510     grad->node = node;
00511     grad->crit = crit;
00512     grad->data = data;
00513     grad->n    = n;
00514     grad->m    = m;
00515     grad->l    = l;
00516     grad->P    = P;
00517 
00518     return grad;
00519 }
00520 
00521 /**
00522  * @brief Destroys a gradient buffer.
00523  * @ingroup gnn_evaluation_doc
00524  *
00525  * @param grad A pointer a \ref gnn_grad buffer.
00526  */
00527 void
00528 gnn_grad_destroy (gnn_grad *grad)
00529 {
00530     assert (grad != NULL);
00531 
00532     if (grad->y   != NULL) gsl_vector_free (grad->y);
00533     if (grad->w   != NULL) gsl_vector_free (grad->w);
00534     if (grad->dy  != NULL) gsl_vector_free (grad->dy);
00535     if (grad->dx  != NULL) gsl_vector_free (grad->dx);
00536     if (grad->mdx != NULL) gsl_vector_free (grad->mdx);
00537     if (grad->dw  != NULL) gsl_vector_free (grad->dw);
00538     if (grad->mdw != NULL) gsl_vector_free (grad->mdw);
00539     if (grad->h   != NULL) gsl_vector_free (grad->h);
00540     if (grad != NULL) free (grad);
00541 }
00542 
00543 
00544 /**
00545  * @brief Compute the mean cost and gradients.
00546  * @ingroup gnn_evaluation_doc
00547  *
00548  * This function computes many things simultaneously in one pass. It builds
00549  * estimations of the gradients \f$\frac{\partial E}{\partial x}\f$
00550  * and \f$\frac{\partial E}{\partial w}\f$ and the cost \f$E\f$ by
00551  * averaging over the n patterns starting at s. That is, this function
00552  * computes:
00553  *
00554  * \f[ <\frac{\partial E}{\partial x}>
00555  *     = \frac{1}{\sum_{k=s}^{s+n} p_k}
00556  *       \sum_{k=s}^{s+n} p_k \frac{\partial E}{\partial x}(x^k, w),
00557  * \f]
00558  * \f[ <\frac{\partial E}{\partial w}>
00559  *     = \frac{1}{\sum_{k=s}^{s+n} p_k}
00560  *       \sum_{k=s}^{s+n} p_k \frac{\partial E}{\partial w}(x^k, w)
00561  * \f]
00562  * and
00563  * \f[ <E>
00564  *     = \frac{1}{\sum_{k=s}^{s+n} p_k}
00565  *       \sum_{k=s}^{s+n} p_k E(f(x^k, w), t^k)
00566  * \f]
00567  * where \f$p_k\f$ is the \f$k\f$-th pattern's weight,
00568  * \f$ \frac{\partial E}{\partial x}(x^k, w) \f$ is the gradient with
00569  * respect to the inputs evaluated at the \f$k\f$-th pattern,
00570  * \f$ \frac{\partial E}{\partial w}(x^k, w) \f$ is the gradient with
00571  * respect to the parameters evaluated at the \f$k\f$-th pattern, and
00572  * \f$ E(f(x^k, w), t^k) \f$ is the cost/error for the \f$k\f$-th output.
00573  *
00574  * In order to obtain the results, you should tell the function how far
00575  * it should process the patterns, using a special flag. The evaluation flag
00576  * can be:
00577  * - gnnGradE : only compute mean error \f$<E>\f$.
00578  * - gnnGradDx : compute \f$<E>\f$ and \f$<\frac{\partial E}{\partial x}>\f$.
00579  * - gnnGradDw : compute \f$<E>\f$ and \f$<\frac{\partial E}{\partial x}>\f$
00580  *               and \f$<\frac{\partial E}{\partial w}>\f$
00581  *
00582  * The computed values can be retrieved directly from the \ref gnn_grad buffer:
00583  * \code
00584  * // compute all
00585  * gnn_grad_pats (g, gnnGradDw, 10, 20);
00586  *
00587  * // now, get them
00588  * e  = GNN_GRAD_E (g);       // e is a double
00589  * dx = GNN_GRAD_DX (g);      // dx is a gsl_vector *
00590  * dw = GNN_GRAD_DW (g);      // dw is a gsl_vector *
00591  *
00592  * sp = GNN_GRAD_SUMP (g);    // sp is a double
00593  * se = GNN_GRAD_SUME (g);    // se is a double
00594  * \endcode
00595  *
00596  * The starting index s should be within the valid bounds. The size n of the
00597  * batch will be adapted if s + n exceeds the valid bounds. That is, e.g. if
00598  * there are 10 patterns (and so the index of the last pattern to be considered
00599  * is 9), and s=8 and n=4, then the last pattern in the averaging will be
00600  * 9, although 8+4=12.
00601  *
00602  * @param grad A pointer to a \ref gnn_grad.
00603  * @param flag The evaluation flag.
00604  * @param s    The index of the first pattern in the dataset to be considered
00605  *             in the averaging.
00606  * @param n    The amount of patterns to be considered.
00607  * @return Returns 0 if suceeded.
00608  */
00609 int
00610 gnn_grad_pats (gnn_grad *grad, gnn_grad_eval flag, size_t s, size_t n)
00611 {
00612     size_t k;
00613     size_t P;
00614     double p;
00615     gsl_vector *x;
00616     gsl_vector *t;
00617 
00618     assert (grad != NULL);
00619 
00620     /* check batch bounds */
00621     if (s < 0 || s >= grad->P)
00622     {
00623         GSL_ERROR ("invalid starting index", GSL_EINVAL);
00624     }
00625     if (n < 1)
00626     {
00627         GSL_ERROR ("the number of patterns in the batch "
00628                    "should be stricly positive",
00629                    GSL_EINVAL);
00630     }
00631     
00632     /* adjust batch size */
00633     n = (n+s <= grad->P)? n : grad->P - s;
00634 
00635     /* clear gradients */
00636     if (flag >= gnnGradDx) gsl_vector_set_zero (grad->mdx);
00637     if (flag >= gnnGradDw) gsl_vector_set_zero (grad->mdw);
00638 
00639     /* compute gradient */
00640     grad->mp = 0.0;
00641     grad->me = 0.0;
00642     for (k=s; k<s+n; ++k)
00643     {
00644         /* get pattern */
00645         gnn_dataset_get (grad->data, k, &x, &t, &p);
00646 
00647         /* compute gradient */
00648         gnn_node_evaluate_init (grad->node);
00649         gnn_node_evaluate_f (grad->node, x, grad->y);
00650         grad->e = gnn_criterion_evaluate_e (grad->crit, grad->y, t);
00651         gnn_criterion_evaluate_dy (grad->crit, grad->dy);
00652         gnn_node_evaluate_dx (grad->node, grad->dy, grad->dx);
00653         gnn_node_evaluate_dw (grad->node, grad->dw);
00654 
00655         /* scale and sum gradients */
00656         if (flag >= gnnGradDx)
00657         {
00658             gsl_vector_scale (grad->dx, p);
00659             gsl_vector_add (grad->mdx, grad->dx);
00660         }
00661         if (flag >= gnnGradDw)
00662         {
00663             gsl_vector_scale (grad->dw, p);
00664             gsl_vector_add (grad->mdw, grad->dw);
00665         }
00666 
00667         /* keep track of the weight and cost sum */
00668         grad->mp += p;
00669         grad->me += p * grad->e;
00670     }
00671 
00672     /* scale gradients and cost */
00673     if (flag >= gnnGradDx)
00674         gsl_vector_scale (grad->mdx, 1 / grad->mp);
00675 
00676     if (flag >= gnnGradDw)
00677         gsl_vector_scale (grad->mdw, 1 / grad->mp);
00678 
00679     grad->me = grad->me / grad->mp;
00680     
00681     return 0;
00682 }
00683 
00684 /**
00685  * @brief Compute the mean cost and gradients.
00686  * @ingroup gnn_evaluation_doc
00687  *
00688  * This function behaves like the \ref gnn_grad_pats function, but
00689  * it computes the gradients and the error over the whole dataset. That is,
00690  * it computes:
00691  *
00692  * \f[ <\frac{\partial E}{\partial x}>
00693  *     = \frac{1}{\sum_{k=0}^{P-1} p_k}
00694  *       \sum_{k=0}^{P-1} p_k \frac{\partial E}{\partial x}(x^k, w),
00695  * \f]
00696  * \f[ <\frac{\partial E}{\partial w}>
00697  *     = \frac{1}{\sum_{k=0}^{P-1} p_k}
00698  *       \sum_{k=0}^{P-1} p_k \frac{\partial E}{\partial w}(x^k, w)
00699  * \f]
00700  * and
00701  * \f[ <E>
00702  *     = \frac{1}{\sum_{k=0}^{P-1} p_k}
00703  *       \sum_{k=0}^{P-1} p_k E(f(x^k, w), t^k)
00704  * \f]
00705  * where \f$p_k\f$ is the \f$k\f$-th pattern's weight,
00706  * \f$ \frac{\partial E}{\partial x}(x^k, w) \f$ is the gradient with
00707  * respect to the inputs evaluated at the \f$k\f$-th pattern,
00708  * \f$ \frac{\partial E}{\partial w}(x^k, w) \f$ is the gradient with
00709  * respect to the parameters evaluated at the \f$k\f$-th pattern, and
00710  * \f$ E(f(x^k, w), t^k) \f$ is the cost/error for the \f$k\f$-th output.
00711  *
00712  * Recall that \f$P\f$ is the number of patterns contained in the dataset.
00713  *
00714  * The evaluation flag can be:
00715  * - gnnGradE : only compute mean error \f$<E>\f$.
00716  * - gnnGradDx : compute \f$<E>\f$ and \f$<\frac{\partial E}{\partial x}>\f$.
00717  * - gnnGradDw : compute \f$<E>\f$ and \f$<\frac{\partial E}{\partial x}>\f$
00718  *               and \f$<\frac{\partial E}{\partial w}>\f$
00719  *
00720  * The computed values can be retrieved directly from the \ref gnn_grad buffer:
00721  * \code
00722  * // compute all
00723  * gnn_grad_all (g, gnnGradDw);
00724  *
00725  * // now, get them
00726  * e  = GNN_GRAD_E (g);       // e is a double
00727  * dx = GNN_GRAD_DX (g);      // dx is a gsl_vector *
00728  * dw = GNN_GRAD_DW (g);      // dw is a gsl_vector *
00729  *
00730  * sp = GNN_GRAD_SUMP (g);    // sp is a double
00731  * se = GNN_GRAD_SUME (g);    // se is a double
00732  * \endcode
00733  *
00734  * @param grad A pointer to a \ref gnn_grad.
00735  * @param flag The evaluation flag.
00736  * @return Returns 0 if suceeded.
00737  */
00738 int
00739 gnn_grad_all (gnn_grad *grad, gnn_grad_eval flag)
00740 {
00741     assert (grad != NULL);
00742 
00743     return gnn_grad_pats (grad, flag, 0, grad->P);
00744 }
00745 
00746 
00747 
00748 /****************************/
00749 /* c) Error Line Evaluation */
00750 /****************************/
00751 
00752 /**
00753  * @brief Returns a new buffer structure for line evaluations.
00754  * @ingroup gnn_evaluation_doc
00755  *
00756  * This function builds a new buffer structure for line evaluations. The
00757  * direction is given by the \a direction vector, and the origin is set at
00758  * the \ref gnn_node's current parameter vector. If the direction is omitted
00759  * (NULL is given), then it will be a null vector.
00760  *
00761  * @param grad A pointer to a \ref gnn_grad.
00762  * @param direction A pointer to a gsl_vector.
00763  * @return Returns a pointer to the new \ref gnn_line buffer or NULL if failed.
00764  */
00765 gnn_line *
00766 gnn_line_new (gnn_grad *grad, gsl_vector *direction)
00767 {
00768     gnn_line *line;
00769     
00770     assert (grad != NULL);
00771 
00772     /* check sizes */
00773     if (direction != NULL && grad->l != direction->size)
00774     {
00775         GSL_ERROR_VAL ("the directional vector's size doesn't match",
00776                        GSL_EINVAL, NULL);
00777     }
00778     
00779     /* allocate line buffer */
00780     line = (gnn_line *) malloc (sizeof (*line));
00781     if (line == NULL)
00782     {
00783         GSL_ERROR_VAL ("couldn't allocate memory for gnn_line",
00784                        GSL_ENOMEM, NULL);
00785     }
00786 
00787     /* allocate internal buffers and vectors */
00788     line->w    = gsl_vector_alloc (grad->l);
00789     line->d    = gsl_vector_alloc (grad->l);
00790     line->dbuf = gsl_vector_alloc (grad->l);
00791     if (line->w == NULL || line->d == NULL || line->dbuf == NULL)
00792     {
00793         gnn_line_destroy (line);
00794         GSL_ERROR_VAL ("couldn't allocate memory for gnn_line's buffers",
00795                        GSL_ENOMEM, NULL);
00796     }
00797 
00798     /* set remaining fields */
00799     line->grad       = grad;
00800     line->derivative = 0.0;
00801     
00802     /* set origin */
00803     gnn_node_param_get (grad->node, line->w);
00804     
00805     /* set directional vector */
00806     if (direction != NULL)
00807         gsl_vector_memcpy (line->d, direction);
00808     else
00809         gsl_vector_set_zero (line->d);
00810 
00811     return line;
00812 }
00813 
00814 /**
00815  * @brief Destroys a given line evaluation buffer.
00816  * @ingroup gnn_evaluation_doc
00817  *
00818  * This function frees the \ref gnn_line's internal buffers and deallocates
00819  * the structure. The installed \ref gnn_grad error and gradient evaluation
00820  * buffer won't be destroyed.
00821  *
00822  * @param line A pointer to a \ref gnn_line.
00823  */
00824 void
00825 gnn_line_destroy (gnn_line *line)
00826 {
00827     assert (line != NULL);
00828     
00829     if (line->w != NULL)
00830         gsl_vector_free (line->w);
00831     if (line->d != NULL)
00832         gsl_vector_free (line->d);
00833     if (line->dbuf != NULL)
00834         gsl_vector_free (line->dbuf);
00835     free (line);
00836 }
00837 
00838 /**
00839  * @brief Sets a new direction \f$d\f$ for the \ref gnn_line structure.
00840  * @ingroup gnn_evaluation_doc
00841  *
00842  * This function sets a new directional vector \f$d\f$ for the line evaluation
00843  * buffer. The vector should be of the correct size.
00844  *
00845  * @param line A pointer to a \ref gnn_line.
00846  * @param dir  A pointer to a gsl_vector.
00847  * @return Returns 0 if succeeded.
00848  */
00849 int
00850 gnn_line_set_direction (gnn_line *line, const gsl_vector *dir)
00851 {
00852     assert (line != NULL);
00853     assert (dir != NULL);
00854 
00855     /* check size */
00856     if (line->grad->l != dir->size)
00857     {
00858         GSL_ERROR ("the size fo the directional vector is incorrect",
00859                    GSL_EINVAL);
00860     }
00861 
00862     /* copy vector */
00863     gsl_vector_memcpy (line->d, dir);
00864 
00865     return 0;
00866 }
00867 
00868 /**
00869  * @brief Gets the installed directional vector \f$d\f$.
00870  * @ingroup gnn_evaluation_doc
00871  *
00872  * This function returns a pointer to the currently used directional buffer
00873  * \f$d\f$ used by the \ref gnn_line buffer for its line evaluations.
00874  *
00875  * @param line A pointer to a \ref gnn_line.
00876  * @return Returns a pointer to the directional vector.
00877  */
00878 const gsl_vector *
00879 gnn_line_get_direction (gnn_line *line)
00880 {
00881     assert (line != NULL);
00882     return line->d;
00883 }
00884 
00885 /**
00886  * @brief Sets a new origin \f$w_0\f$ for the \ref gnn_line structure.
00887  * @ingroup gnn_evaluation_doc
00888  *
00889  * This function sets a new origin vector \f$w_0\f$ for the line evaluation
00890  * buffer. The vector should be of the correct size.
00891  *
00892  * @param line A pointer to a \ref gnn_line.
00893  * @param origin A pointer to a gsl_vector.
00894  * @return Returns 0 if succeeded.
00895  */
00896 int
00897 gnn_line_set_origin (gnn_line *line, const gsl_vector *origin)
00898 {
00899     assert (line != NULL);
00900     assert (origin != NULL);
00901 
00902     /* check size */
00903     if (line->grad->l != origin->size)
00904     {
00905         GSL_ERROR ("the size fo the origin vector is incorrect",
00906                    GSL_EINVAL);
00907     }
00908 
00909     /* copy vector */
00910     gsl_vector_memcpy (line->w, origin);
00911 
00912     return 0;
00913 }
00914 
00915 /**
00916  * @brief Gets the installed origin vector \f$w_0\f$.
00917  * @ingroup gnn_evaluation_doc
00918  *
00919  * This function returns a pointer to the currently used origin vector
00920  * \f$w_0\f$ by the \ref gnn_line for its line evaluations.
00921  *
00922  * @param line A pointer to a \ref gnn_line.
00923  * @return Returns a pointer to the origin vector.
00924  */
00925 const gsl_vector *
00926 gnn_line_get_origin (gnn_line *line)
00927 {
00928     assert (line != NULL);
00929     return line->w;
00930 }
00931 
00932 /**
00933  * @brief Gets the installed \ref gnn_grad evaluation buffer.
00934  * @ingroup gnn_evaluation_doc
00935  *
00936  * This function returns a buffer for the installed \ref gnn_grad error and
00937  * gradients evaluation buffer used by the current \ref gnn_line structure
00938  * for its line evaluation.
00939  *
00940  * @param line A pointer to a \ref gnn_line.
00941  * @return Returns a pointer to the internal \ref gnn_grad structure.
00942  */
00943 gnn_grad *
00944 gnn_line_get_grad (gnn_line *line)
00945 {
00946     assert (line != NULL);
00947     return line->grad;
00948 }
00949 
00950 /**
00951  * @brief Compute mean cost and gradients along a direction for a minibatch.
00952  * @ingroup gnn_evaluation_doc
00953  *
00954  * This function behaves like the \ref gnn_grad_pats function, but
00955  * it computes the error and its derivative along a given line. That is,
00956  * it performs all evaluations at
00957  *
00958  * \f[ \overline{w} = w_0 + \alpha d \f]
00959  *
00960  * where \f$w_0\f$ are the node's current parameters, \f$\alpha\f$ is a scalar
00961  * and \f$d\f$ is a given direction, treating the function as it where just
00962  * a one-dimensional function. The patterns starting at \a s and ending
00963  * at \a s+n are used for the averaging.
00964  *
00965  * The flag \a flag tells the function if it should compute only the error
00966  * (\c gnnLineE) or the error with its derivative along the line (\c gnnLineDE).
00967  * The approximated derivative is apropiately scaled in order to match
00968  * the real derivative:
00969  * \f[ \frac{1}{||d||}\frac{\partial E}{\partial \alpha}
00970  *     = \frac{1}{||d||}\frac{\partial E}{\partial w}
00971  *                    \frac{\partial w}{\partial \alpha}
00972  *     = \frac{1}{||d||}\frac{\partial E}{\partial w}
00973  * \f]
00974  *
00975  * The results can be recovered by the macros \ref GNN_LINE_E and
00976  * \ref GNN_LINE_DE:
00977  *
00978  * \code
00979  * gnn_grad *grad;
00980  * gnn_line *line;
00981  * double    error;
00982  * double    derivative;
00983  *
00984  * // build grad and line buffers
00985  * ...
00986  *
00987  * // evaluate the patterns 11-30 at 0.5
00988  * gnn_line_pats (line, 0.5, gnnLineDE, 11, 30);
00989  *
00990  * // get results
00991  * error      = GNN_LINE_E (line);
00992  * derivative = GNN_LINE_DE (line);
00993  * \endcode
00994  *
00995  * \warning A line evaluation modifies the \ref gnn_node's internal parameters.
00996  *          In order to recover the original vector, call this function with
00997  *          \f$\alpha = 0\f$.
00998  *
00999  * @param line  A pointer to a \ref gnn_line.
01000  * @param alpha The scalar \f$\alpha\f$.
01001  * @param flag  The evaluation flag.
01002  * @param s     The first pattern index \f$s\f$.
01003  * @param n     The size of the minibatch \f$n\f$.
01004  * @return Returns 0 if suceeded.
01005  */
01006 int
01007 gnn_line_pats (gnn_line *line,
01008                  double alpha, gnn_line_eval flag, size_t s, size_t n)
01009 {
01010     assert (line != NULL);
01011 
01012     /* compute new parameter vector (using the correct directional vector */
01013     gsl_vector_memcpy (line->dbuf, line->d);
01014     gsl_vector_scale (line->dbuf, alpha);
01015     gsl_vector_add (line->dbuf, line->w);
01016 
01017     /* set the new parameter vector */
01018     gnn_node_param_set (line->grad->node, line->dbuf);
01019 
01020     /* evaluate */
01021     if (flag == gnnLineE)
01022     {
01023         /* compute error */
01024         gnn_grad_pats (line->grad, gnnGradE, s, n);
01025         line->error = GNN_GRAD_E (line->grad);
01026     }
01027     else if (flag == gnnLineDE)
01028     {
01029         double dE;
01030         double nrmd;
01031         
01032         /* compute error */
01033         gnn_grad_pats (line->grad, gnnGradDw, s, n);
01034         line->error = GNN_GRAD_E (line->grad);
01035         
01036         /* compute derivative */
01037         gsl_blas_ddot (line->d, GNN_GRAD_DW (line->grad), &dE);
01038         nrmd = gsl_blas_dnrm2 (line->d);
01039         line->derivative = dE / nrmd;
01040     }
01041     else
01042         GSL_ERROR ("undefined flag for line operation", GSL_EINVAL);
01043 
01044     return 0;
01045 }
01046 
01047 /**
01048  * @brief Compute mean cost and gradients along a direction for a minibatch.
01049  * @ingroup gnn_evaluation_doc
01050  *
01051  * This function behaves like the \ref gnn_grad_pats function, but
01052  * it computes the error and its derivative along a given line. That is,
01053  * it performs all evaluations at
01054  *
01055  * \f[ \overline{w} = w_0 + \alpha d \f]
01056  *
01057  * where \f$w_0\f$ are the node's current parameters, \f$\alpha\f$ is a scalar
01058  * and \f$d\f$ is a given direction, treating the function as it where just
01059  * a one-dimensional function. All patterns are used for this evaluation.
01060  *
01061  * The flag \a flag tells the function if it should compute only the error
01062  * (\c gnnLineE) or the error with its derivative along the line (\c gnnLineDE).
01063  * The approximated derivative is apropiately scaled in order to match
01064  * the real derivative:
01065  * \f[ \frac{1}{||d||}\frac{\partial E}{\partial \alpha}
01066  *     = \frac{1}{||d||}\frac{\partial E}{\partial w}
01067  *                      \frac{\partial w}{\partial \alpha}
01068  *     = \frac{1}{||d||}\frac{\partial E}{\partial w}
01069  * \f]
01070  *
01071  * The results can be recovered by the macros \ref GNN_LINE_E and
01072  * \ref GNN_LINE_DE:
01073  *
01074  * \code
01075  * gnn_grad *grad;
01076  * gnn_line *line;
01077  * double    error;
01078  *
01079  * // build grad and line buffers
01080  * ...
01081  *
01082  * // evaluate all patterns at 1.2
01083  * gnn_line_all (line, 1.2, gnnLineE);
01084  *
01085  * // get results
01086  * error      = GNN_LINE_E (line);
01087  * \endcode
01088  *
01089  * \warning A line evaluation modifies the \ref gnn_node's internal parameters.
01090  *          In order to recover the original vector, call this function with
01091  *          \f$\alpha = 0\f$.
01092  *
01093  * @param line  A pointer to a \ref gnn_line.
01094  * @param alpha The scalar \f$\alpha\f$.
01095  * @param flag  The evaluation flag.
01096  * @return Returns 0 if suceeded.
01097  */
01098 int
01099 gnn_line_all (gnn_line *line, double alpha, gnn_line_eval flag)
01100 {
01101     return gnn_line_pats (line, alpha, flag, 0, line->grad->P);
01102 }
01103 
01104 
01105