00001 /*************************************************************************** 00002 * @file gnn_tanh.c 00003 * @brief Hyperbolic Tangent transfer function. 00004 * 00005 * @date : 06-08-03 18:17, 21-08-03 20:00 00006 * @author : Pedro Ortega C. <peortega@dcc.uchile.cl> 00007 * Copyright 2003 Pedro Ortega C. 00008 ****************************************************************************/ 00009 /* 00010 * This program is free software; you can redistribute it and/or modify 00011 * it under the terms of the GNU General Public License as published by 00012 * the Free Software Foundation; either version 2 of the License, or 00013 * (at your option) any later version. 00014 * 00015 * This program is distributed in the hope that it will be useful, 00016 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00017 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00018 * GNU Library General Public License for more details. 00019 * 00020 * You should have received a copy of the GNU General Public License 00021 * along with this program; if not, write to the Free Software 00022 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 00023 */ 00024 00025 /** 00026 * @defgroup gnn_tanh_doc gnn_tanh : Hyperbolic Tangent Activation Function. 00027 * @ingroup gnn_atomic_doc 00028 * 00029 * This node type implements the Hyperbolic Tangent activation function given 00030 * by: 00031 * \f[ y_i = a \tanh (b x_i) \f] 00032 * where the index \f$i = 1,\ldots,n\f$ runs over all inputs / outputs. 00033 * 00034 * This function is a member of the more general class of sigmoid functions, 00035 * which have the property to "squash" its inputs to a very restricted output 00036 * interval. 00037 * 00038 * The Hyperbolic Tangent is symmetric and its outputs lie between \f$[-a,a]\f$. 00039 * It has been said that it leads to faster convergence in training than the 00040 * Logistic Sigmoid. 00041 */ 00042 00043 00044 /******************************************/ 00045 /* Include Files */ 00046 /******************************************/ 00047 00048 #include "gnn_tanh.h" 00049 #include <math.h> 00050 00051 /******************************************/ 00052 /* Macros and Definitions */ 00053 /******************************************/ 00054 00055 #define GNN_TANH_A(node) (((gnn_tanh *)(node))->a) 00056 #define GNN_TANH_B(node) (((gnn_tanh *)(node))->b) 00057 00058 /******************************************/ 00059 /* Static Declaration */ 00060 /******************************************/ 00061 00062 typedef struct _gnn_tanh 00063 { 00064 gnn_node node; 00065 double a; 00066 double b; 00067 } gnn_tanh; 00068 00069 static int 00070 gnn_tanh_f (gnn_node *node, 00071 const gsl_vector *x, 00072 const gsl_vector *w, 00073 gsl_vector *y); 00074 00075 static int 00076 gnn_tanh_dx (gnn_node *node, 00077 const gsl_vector *x, 00078 const gsl_vector *w, 00079 const gsl_vector *dy, 00080 gsl_vector *dx); 00081 00082 00083 00084 /******************************************/ 00085 /* Static Implementation */ 00086 /******************************************/ 00087 00088 /** 00089 * @brief Computes the output. 00090 * @ingroup gnn_tanh_doc 00091 * 00092 * This functions evaluates the hyperbolic tangent function. 00093 * 00094 * @param node A pointer to the \ref gnn_tanh node. 00095 * @param x The input vector \f$x\f$. 00096 * @param w The parameter vector \f$w\f$ (which is empty in this case). 00097 * @param y The output buffer vector \f$y\f$. 00098 * @return Returns 0 on success. 00099 */ 00100 static int 00101 gnn_tanh_f (gnn_node *node, 00102 const gsl_vector *x, 00103 const gsl_vector *w, 00104 gsl_vector *y) 00105 { 00106 size_t i; 00107 size_t size; 00108 double a; 00109 double b; 00110 00111 /* get the size */ 00112 size = gnn_node_input_get_size (node); 00113 00114 /* get the parameters */ 00115 a = GNN_TANH_A (node); 00116 b = GNN_TANH_B (node); 00117 00118 /* evaluate */ 00119 for (i=0; i<size; ++i) 00120 { 00121 double xi; 00122 double yi; 00123 00124 xi = gsl_vector_get (x, i); 00125 yi = a * tanh (b * xi); 00126 gsl_vector_set (y, i, yi); 00127 } 00128 00129 return 0; 00130 } 00131 00132 /** 00133 * @brief Computes \f$ \frac{\partial E}{\partial X} \f$. 00134 * @ingroup gnn_tanh_doc 00135 * 00136 * This functions computes the gradient of the hyperbolic tangent function, 00137 * given dedy (\f$ \frac{\partial E}{\partial Y} \f$). The function is, 00138 * \f[ \frac{\partial E}{\partial x_i} = 00139 * ab \, sech^2 (bx_i) \frac{\partial E}{\partial y_i} = 00140 * ab \left ( 1 - \tanh^2(bx_i) \right ) \frac{\partial E}{\partial y_i} \f] 00141 * 00142 * @param node A pointer to the \ref gnn_tanh node. 00143 * @param x The input vector \f$x\f$. 00144 * @param w The parameter vector \f$w\f$ (which is empty in this case). 00145 * @param dy The vector \f$\frac{\partial E}{\partial y}\f$. 00146 * @param dx The output buffer vector \f$\frac{\partial E}{\partial x}\f$. 00147 * @return Returns 0 on success. 00148 */ 00149 static int 00150 gnn_tanh_dx (gnn_node *node, 00151 const gsl_vector *x, 00152 const gsl_vector *w, 00153 const gsl_vector *dy, 00154 gsl_vector *dx) 00155 { 00156 size_t i; 00157 size_t size; 00158 double a; 00159 double b; 00160 00161 /* get the size */ 00162 size = gnn_node_input_get_size (node); 00163 00164 /* get the parameters */ 00165 a = GNN_TANH_A (node); 00166 b = GNN_TANH_B (node); 00167 00168 /* evaluate */ 00169 for (i=0; i<size; ++i) 00170 { 00171 double xi; 00172 double yi; 00173 double dxi; 00174 double dyi; 00175 00176 xi = gsl_vector_get (x, i); 00177 yi = a * tanh (b * xi); 00178 dyi = gsl_vector_get (dy, i); 00179 dxi = a * b * (1.0 - yi * yi) * dyi; 00180 00181 gsl_vector_set (dx, i, dxi); 00182 } 00183 00184 return 0; 00185 } 00186 00187 00188 /******************************************/ 00189 /* Public Interface */ 00190 /******************************************/ 00191 00192 /** 00193 * @brief Creates a Hyperbolic Tangent Activation Function node. 00194 * @ingroup gnn_tanh_doc 00195 * 00196 * This function creates a node of the gnn_tanh type. This node computes 00197 * \f[ y_i = a \tanh (b x_i) \f] 00198 * where \f$i = 1, \ldots, n\f$, \f$a\f$ and \f$b\f$ constant parameters. 00199 * 00200 * @param input_size The input size \f$n\f$. 00201 * @param a The \f$a\f$ factor. 00202 * @param b The \f$b\f$ factor. 00203 * @return A pointer to a new \ref gnn_tanh node. 00204 */ 00205 gnn_node * 00206 gnn_tanh_new (int input_size, double a, double b) 00207 { 00208 int status; 00209 gnn_node *node; 00210 00211 /* check if sizes are positive */ 00212 if (input_size < 1) 00213 { 00214 GSL_ERROR_VAL ("input size should be strictly positive", 00215 GSL_EINVAL, NULL); 00216 } 00217 00218 /* allocate the node */ 00219 node = (gnn_node *) malloc (sizeof (gnn_tanh)); 00220 if (node == NULL) 00221 { 00222 GSL_ERROR_VAL ("could not allocate memory for gnn_tanh node", 00223 GSL_ENOMEM, NULL); 00224 } 00225 00226 /* Initialize the node */ 00227 status = gnn_node_init (node, 00228 "gnn_tanh", 00229 gnn_tanh_f, 00230 gnn_tanh_dx, 00231 NULL, 00232 NULL); 00233 if (status) 00234 { 00235 GSL_ERROR_VAL ("could not initialize gnn_tanh node", GSL_EFAILED, NULL); 00236 } 00237 00238 status = gnn_node_set_sizes (node, input_size, input_size, 0); 00239 if (status) 00240 { 00241 GSL_ERROR_VAL ("could not set sizes for gnn_tanh node", 00242 GSL_EFAILED, NULL); 00243 } 00244 00245 /* set the constants */ 00246 GNN_TANH_A (node) = a; 00247 GNN_TANH_B (node) = b; 00248 00249 return node; 00250 } 00251 00252 /** 00253 * @brief Creates a standard Hyperbolic Tangent function node. 00254 * @ingroup gnn_tanh_doc 00255 * 00256 * This function creates a layer of the gnn_tanh type: 00257 * \f[ y_i = \tanh (x_i) \f] 00258 * where \f$i = 1, \ldots, n\f$. 00259 * 00260 * @param input_size The input size \f$n\f$. 00261 * @return A pointer to a new \ref gnn_tanh node. 00262 */ 00263 gnn_node * 00264 gnn_tanh_standard_new (int input_size) 00265 { 00266 return gnn_tanh_new (input_size, 1.0, 1.0); 00267 } 00268 00269 /** 00270 * @brief Creates a Hyperbolic Tangent function node. 00271 * @ingroup gnn_tanh_doc 00272 * 00273 * This function creates a node of the gnn_tanh type. 00274 * \f[ y_i = 1.7159 \tanh (\frac{2}{3} x_i) \f] 00275 * where \f$i = 1, \ldots, n\f$. The constants are chosen so that 00276 * when the layer is feed with a linear combination layer with 00277 * normalized outputs its own outputs have an effective gain of 1. 00278 * 00279 * @param input_size The input size \f$n\f$. 00280 * @return A pointer to a new \ref gnn_tanh node. 00281 */ 00282 gnn_node * 00283 gnn_tanh_enhanced_new (int input_size) 00284 { 00285 return gnn_tanh_new (input_size, 1.7159, 2.0 / 3.0); 00286 } 00287 00288 /** 00289 * @brief Returns the amplitude constant \f$a\f$. 00290 * @ingroup gnn_tanh_doc 00291 * 00292 * @param node A pointer to the \ref gnn_tanh node. 00293 * @return The amplitude \f$a\f$ of the sigmoid. 00294 */ 00295 double 00296 gnn_tanh_get_a (gnn_node *node) 00297 { 00298 return GNN_TANH_A (node); 00299 } 00300 00301 /** 00302 * @brief Sets the amplitude constant \f$a\f$. 00303 * @ingroup gnn_tanh_doc 00304 * 00305 * @param node A pointer to the \ref gnn_tanh node. 00306 * @param a The amplitude factor. 00307 */ 00308 void 00309 gnn_tanh_set_a (gnn_node *node, double a) 00310 { 00311 GNN_TANH_A (node) = a; 00312 } 00313 00314 /** 00315 * @brief Gets the streching factor \f$b\f$. 00316 * @ingroup gnn_tanh_doc 00317 * 00318 * @param node A pointer to the \ref gnn_tanh node. 00319 * @return The streching factor \f$b\f$. 00320 */ 00321 double 00322 gnn_tanh_get_b (gnn_node *node) 00323 { 00324 return GNN_TANH_B (node); 00325 } 00326 00327 /** 00328 * @brief Sets the streching factor \f$b\f$. 00329 * @ingroup gnn_tanh_doc 00330 * 00331 * @param node A pointer to the \ref gnn_tanh node. 00332 * @param b The new streching factor \f$b\f$. 00333 */ 00334 void 00335 gnn_tanh_set_b (gnn_node *node, double b) 00336 { 00337 GNN_TANH_B (node) = b; 00338 } 00339 00340 00341 00342
1.2.18