00001 /*************************************************************************** 00002 * @file gnn_random_order.c 00003 * @brief Random order for datasets implementation. 00004 * 00005 * @date : 30-05-04 18:14 00006 * @author : Pedro Ortega C. <peortega@dcc.uchile.cl> 00007 * Copyright 2003 Pedro Ortega C. 00008 ****************************************************************************/ 00009 /* 00010 * This program is free software; you can redistribute it and/or modify 00011 * it under the terms of the GNU General Public License as published by 00012 * the Free Software Foundation; either version 2 of the License, or 00013 * (at your option) any later version. 00014 * 00015 * This program is distributed in the hope that it will be useful, 00016 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00017 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00018 * GNU Library General Public License for more details. 00019 * 00020 * You should have received a copy of the GNU General Public License 00021 * along with this program; if not, write to the Free Software 00022 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 00023 */ 00024 00025 00026 00027 /** 00028 * @defgroup gnn_random_order_doc gnn_random_order : A random order sampler. 00029 * @ingroup gnn_dataset_doc 00030 * @brief Random order sampler for datasets implementation. 00031 * 00032 * The \ref gnn_random_order is a \ref gnn_dataset that acts as an intermediary 00033 * between an opaque dataset and a sampling client, like a \ref gnn_trainer. 00034 * 00035 * On every call on \ref gnn_dataset_reset, the \ref gnn_random_order sampler 00036 * generates a new permutation (a new sampling order) for the underlying 00037 * \ref gnn_dataset. That is, the \ref gnn_random_order's ith-pattern 00038 * \f$(x^i, t^i, p^i)\f$, corresponds to the pattern 00039 * \f$(x^{P(i)}, t^{P(i)}, p^{P(i)})\f$ of the underlying \ref gnn_dataset 00040 * structure, where \f$P(\cdot): \{0,\ldots,P\} \rightarrow \{0,\ldots,P\}\f$ 00041 * is the random permutation. 00042 * 00043 * When building a \ref gnn_random_order upon a given \ref gnn_dataset, the 00044 * \ref gnn_random_order structure will <b>own</b> the source-dataset. That 00045 * means that if you destroy the \ref gnn_random_order structure, the 00046 * underlying \ref gnn_dataset will be destroyed too. 00047 */ 00048 00049 00050 00051 /******************************************/ 00052 /* Include Files */ 00053 /******************************************/ 00054 00055 #include "gnn_random_order.h" 00056 #include "gnn_simple_set.h" 00057 00058 00059 /******************************************/ 00060 /* Static Declaration */ 00061 /******************************************/ 00062 00063 static int 00064 gnn_random_order_reset (gnn_dataset *set); 00065 00066 static int 00067 gnn_random_order_get (gnn_dataset *set, 00068 size_t k, 00069 gsl_vector **x, 00070 gsl_vector **t, 00071 double *p); 00072 00073 static void 00074 gnn_random_order_destroy (gnn_dataset *set); 00075 00076 00077 00078 /******************************************/ 00079 /* Static Implementation */ 00080 /******************************************/ 00081 00082 /** 00083 * @brief The "reset" function for a gnn_random_order. 00084 * @ingroup gnn_random_order_doc 00085 * 00086 * This function resets the \ref gnn_random_order. 00087 * 00088 * @param set A pointer to a \ref gnn_random_order. 00089 * @return 0 if succeeded. 00090 */ 00091 static int 00092 gnn_random_order_reset (gnn_dataset *set) 00093 { 00094 gnn_random_order *rset; 00095 00096 assert (set != NULL); 00097 00098 rset = (gnn_random_order *) set; 00099 00100 /* reset samplers */ 00101 gnn_dataset_reset (rset->data); 00102 gsl_ran_shuffle (gnn_get_rng (), rset->perm->data, 00103 gnn_dataset_get_size (set), sizeof(size_t)); 00104 00105 return 0; 00106 } 00107 00108 /** 00109 * @brief The "get" function for a simple set. 00110 * @ingroup gnn_random_order_doc 00111 * 00112 * This function is returns the k-th pattern in the random order sampler, 00113 * where the k-th pattern is drawn in a random order from the internal 00114 * \ref gnn_dataset. 00115 * 00116 * @param set A pointer to a \ref gnn_random_order. 00117 * @param k The index of the pattern to be retrieved. 00118 * @param x A reference for the input pattern. 00119 * @param t A reference for the output pattern. 00120 * @param p A pointer to a double for the pattern weight. 00121 * @return 0 if succeeded. 00122 */ 00123 static int 00124 gnn_random_order_get (gnn_dataset *set, 00125 size_t k, 00126 gsl_vector **x, 00127 gsl_vector **t, 00128 double *p) 00129 { 00130 gnn_random_order *rset; 00131 00132 assert (set != NULL); 00133 00134 /* get view as a random order sampler */ 00135 rset = (gnn_random_order *) set; 00136 00137 return gnn_dataset_get (rset->data, k, x, t, p); 00138 } 00139 00140 /** 00141 * @brief Destroy function. 00142 * @ingroup gnn_random_order_doc 00143 * 00144 * This is the \ref gnn_random_order destroy function. 00145 * @param set A pointer to a \ref gnn_random_order dataset. 00146 */ 00147 static void 00148 gnn_random_order_destroy (gnn_dataset *set) 00149 { 00150 gnn_random_order *rset; 00151 00152 assert (set != NULL); 00153 00154 /* get random order sampler view */ 00155 rset = (gnn_random_order *) set; 00156 00157 /* destroy internal dataset */ 00158 if (rset->data != NULL) 00159 gnn_dataset_destroy (rset->data); 00160 if (rset->perm != NULL) 00161 gsl_permutation_free (rset->perm); 00162 } 00163 00164 00165 00166 /******************************************/ 00167 /* Public Interface */ 00168 /******************************************/ 00169 00170 /** 00171 * @brief Builds a new random order sampler. 00172 * @ingroup gnn_random_order_doc 00173 * 00174 * This functions creates a new random order sampler from a given dataset. 00175 * It will create a new random sampling permutation (i.e. it will shuffle) 00176 * on each call on \ref gnn_dataset_reset, so that a sequential pattern 00177 * retrieval from pattern \f$i=0,\ldots,P\f$ will return them in a random 00178 * order. 00179 * 00180 * @param data The \ref gnn_dataset which should serve as pattern source. 00181 * @return Returns a pointer to a new \ref gnn_random_order dataset. 00182 */ 00183 gnn_dataset * 00184 gnn_random_order_new (gnn_dataset *data) 00185 { 00186 size_t size; 00187 size_t n; 00188 size_t m; 00189 size_t P; 00190 int status; 00191 gnn_dataset *set; 00192 gnn_random_order *rset; 00193 00194 assert (data != NULL); 00195 00196 /* get sizes */ 00197 P = gnn_dataset_get_size (data); 00198 n = gnn_dataset_input_get_size (data); 00199 m = gnn_dataset_output_get_size (data); 00200 00201 /* alloc memory for dataset */ 00202 rset = (gnn_random_order *) malloc (sizeof (gnn_random_order)); 00203 if (rset == NULL) 00204 { 00205 GSL_ERROR_VAL ("couldn't allocate memory for " 00206 "gnn_random_order structure", 00207 GSL_ENOMEM, NULL); 00208 } 00209 00210 /* get memory dataset view */ 00211 set = (gnn_dataset *) rset; 00212 00213 /* initialize */ 00214 status = gnn_dataset_init (set, 00215 P, 00216 n, 00217 m, 00218 gnn_random_order_reset, 00219 gnn_random_order_get, 00220 gnn_random_order_destroy); 00221 if (status) 00222 { 00223 GSL_ERROR_VAL ("could not initialize gnn_random_order", 00224 GSL_EINVAL, NULL); 00225 } 00226 00227 /* set fields */ 00228 rset->data = data; 00229 rset->perm = gsl_permutation_calloc (P); 00230 if (rset->perm == NULL) 00231 { 00232 GSL_ERROR_VAL ("couldn't allocate memory for the gnn_random_order's " 00233 "random permutation", GSL_ENOMEM, NULL); 00234 } 00235 gsl_permutation_init (rset->perm); 00236 gsl_ran_shuffle (gnn_get_rng (), rset->perm->data, P, sizeof(size_t)); 00237 00238 return set; 00239 } 00240 00241 00242 00243 /** 00244 * @brief Builds a new random sampler from gnn_inputs. 00245 * @ingroup gnn_random_order_doc 00246 * 00247 * This functions creates a new random order sampler from two or three input 00248 * samplers. The first two correspond to the input pattern and output pattern 00249 * samplers, wich should contain the same amount of patterns. The third is the 00250 * pattern weight sampler, which, if given, should also be of the same size. 00251 * If omitted (that means it is a \c NULL pointer), then all weights are 00252 * assumed to be 1. 00253 * 00254 * @note Internally, a \ref gnn_random_order sampler created this way 00255 * contains a \ref gnn_simple_set dataset. 00256 * 00257 * @param inputs The input sampler, a \ref gnn_input object. 00258 * @param outputs The target sampler, a \ref gnn_input object. 00259 * @param weights The weight sampler, a \ref gnn_input object. 00260 * @return Returns a pointer to a new \ref gnn_random_order dataset. 00261 */ 00262 gnn_dataset * 00263 gnn_random_order_from_inputs_new (gnn_input *inputs, 00264 gnn_input *outputs, 00265 gnn_input *weights) 00266 { 00267 return gnn_random_order_new (gnn_simple_set_new (inputs, outputs, weights)); 00268 } 00269 00270 00271 00272 /** 00273 * @brief Builds a new random order sampler from text files. 00274 * @ingroup gnn_random_order_doc 00275 * 00276 * This functions creates a new random order sampler from two or three text 00277 * files containing the patterns. 00278 * 00279 * The text files should be ASCII-formatted, and should contain matrices 00280 * where the number of columns corresponds to the size of the vectors, and 00281 * the row number to the number of patterns. Obviously, it is illegal to 00282 * provide matrices of different row sizes. Also, the weights file should 00283 * have <b>only one<b> column. 00284 * 00285 * If the weights file is omitted (i.e. it is a pointer to \c NULL), the 00286 * the weights are assumed to be 1. 00287 * 00288 * It is basically a shorthand for building the dataset from \ref gnn_input 00289 * structures which themselves should be built from text files. 00290 * 00291 * @param inputs The name of the file containing the input examples. 00292 * @param outputs The name of the file containing the output examples 00293 * (targets). 00294 * @param weights The name of the file containing the vector with the 00295 * weights for each pattern, or NULL if they should all be 1. 00296 * @return Returns a pointer to a new \ref gnn_random_order dataset. 00297 */ 00298 gnn_dataset * 00299 gnn_random_order_from_file_new (const char *inputsFile, 00300 const char *outputsFile, 00301 const char *weightsFile) 00302 { 00303 return gnn_random_order_new ( 00304 gnn_simple_set_from_files_new (inputsFile, outputsFile, weightsFile)); 00305 } 00306 00307 00308 00309 /** 00310 * @brief Returns a pointer to the internal gnn_dataset. 00311 * @ingroup gnn_random_order_doc 00312 * 00313 * This function returns a pointer to the internal \ref gnn_dataset structure, 00314 * which is used as source. 00315 * 00316 * @param set A pointer to a \ref gnn_random_order dataset. 00317 * @return Returns a pointer to the internal dataset. 00318 */ 00319 gnn_dataset * 00320 gnn_random_order_get_dataset (gnn_dataset *set) 00321 { 00322 gnn_random_order *rset; 00323 00324 assert (set != NULL); 00325 00326 rset = (gnn_random_order *) set; 00327 return rset->data; 00328 } 00329 00330
1.2.18