SHOGUN  v1.1.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
Features.cpp
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 1999-2009 Soeren Sonnenburg
8  * Written (W) 1999-2008 Gunnar Raetsch
9  * Subset support written (W) 2011 Heiko Strathmann
10  * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society
11  */
12 
15 #include <shogun/io/SGIO.h>
16 #include <shogun/base/Parameter.h>
17 
18 #include <string.h>
19 
20 using namespace shogun;
21 
22 CFeatures::CFeatures(int32_t size)
23 : CSGObject()
24 {
25  init();
26  cache_size = size;
27 }
28 
30 : CSGObject(orig)
31 {
32  init();
33 
34  preproc = orig.preproc;
35  num_preproc = orig.num_preproc;
36 
37  preprocessed=SG_MALLOC(bool, orig.num_preproc);
38  memcpy(preprocessed, orig.preprocessed, sizeof(bool)*orig.num_preproc);
39 }
40 
42 : CSGObject()
43 {
44  init();
45 
46  load(loader);
47  SG_INFO("Feature object loaded (%p)\n",this) ;
48 }
49 
51 {
53  delete m_subset;
54 }
55 
56 void
57 CFeatures::init()
58 {
59  m_parameters->add(&properties, "properties",
60  "Feature properties.");
61  m_parameters->add(&cache_size, "cache_size",
62  "Size of cache in MB.");
63 
64  m_parameters->add_vector((CSGObject***) &preproc,
65  &num_preproc, "preproc",
66  "List of preprocessors.");
67  m_parameters->add_vector(&preprocessed,
68  &num_preproc, "preprocessed",
69  "Feature[i] is already preprocessed.");
70 
71  m_parameters->add((CSGObject**)&m_subset, "subset", "Subset object");
72 
73  m_subset=NULL;
74  properties = FP_NONE;
75  cache_size = 0;
76  preproc = NULL;
77  num_preproc = 0;
78  preprocessed = NULL;
79 }
80 
83 {
84  SG_INFO( "%d preprocs currently, new preproc list is\n", num_preproc);
85  ASSERT(p);
86 
87  bool* preprocd=SG_MALLOC(bool, num_preproc+1);
88  CPreprocessor** pps=SG_MALLOC(CPreprocessor*, num_preproc+1);
89  for (int32_t i=0; i<num_preproc; i++)
90  {
91  pps[i]=preproc[i];
92  preprocd[i]=preprocessed[i];
93  }
94  SG_FREE(preproc);
95  SG_FREE(preprocessed);
96  preproc=pps;
97  preprocessed=preprocd;
98  preproc[num_preproc]=p;
99  preprocessed[num_preproc]=false;
100 
101  num_preproc++;
102 
103  for (int32_t i=0; i<num_preproc; i++)
104  SG_INFO( "preproc[%d]=%s %ld\n",i, preproc[i]->get_name(), preproc[i]) ;
105 
106  SG_REF(p);
107 
108  return num_preproc;
109 }
110 
113 {
114  if (num<num_preproc)
115  {
116  SG_REF(preproc[num]);
117  return preproc[num];
118  }
119  else
120  return NULL;
121 }
122 
125 {
126  int32_t num=0;
127 
128  for (int32_t i=0; i<num_preproc; i++)
129  {
130  if (preprocessed[i])
131  num++;
132  }
133 
134  return num;
135 }
136 
139 {
140  while (del_preprocessor(0));
141 }
142 
145 {
146  CPreprocessor** pps=NULL;
147  bool* preprocd=NULL;
148  CPreprocessor* removed_preproc=NULL;
149 
150  if (num_preproc>0 && num<num_preproc)
151  {
152  removed_preproc=preproc[num];
153 
154  if (num_preproc>1)
155  {
156  pps= SG_MALLOC(CPreprocessor*, num_preproc-1);
157  preprocd= SG_MALLOC(bool, num_preproc-1);
158 
159  if (pps && preprocd)
160  {
161  int32_t j=0;
162  for (int32_t i=0; i<num_preproc; i++)
163  {
164  if (i!=num)
165  {
166  pps[j]=preproc[i];
167  preprocd[j]=preprocessed[i];
168  j++;
169  }
170  }
171  }
172  }
173 
174  SG_FREE(preproc);
175  preproc=pps;
176  SG_FREE(preprocessed);
177  preprocessed=preprocd;
178 
179  num_preproc--;
180 
181  for (int32_t i=0; i<num_preproc; i++)
182  SG_INFO( "preproc[%d]=%s\n",i, preproc[i]->get_name()) ;
183  }
184 
185  SG_UNREF(removed_preproc);
186  return removed_preproc;
187 }
188 
190 {
191  preprocessed[num]=true;
192 }
193 
194 bool CFeatures::is_preprocessed(int32_t num)
195 {
196  return preprocessed[num];
197 }
198 
200 {
201  return num_preproc;
202 }
203 
205 {
206  return cache_size;
207 }
208 
209 bool CFeatures::reshape(int32_t num_features, int32_t num_vectors)
210 {
212  return false;
213 }
214 
216 {
217  SG_INFO( "%p - ", this);
218  switch (get_feature_class())
219  {
220  case C_UNKNOWN:
221  SG_INFO( "C_UNKNOWN ");
222  break;
223  case C_SIMPLE:
224  SG_INFO( "C_SIMPLE ");
225  break;
226  case C_SPARSE:
227  SG_INFO( "C_SPARSE ");
228  break;
229  case C_STRING:
230  SG_INFO( "C_STRING ");
231  break;
232  case C_COMBINED:
233  SG_INFO( "C_COMBINED ");
234  break;
235  case C_COMBINED_DOT:
236  SG_INFO( "C_COMBINED_DOT ");
237  break;
238  case C_WD:
239  SG_INFO( "C_WD ");
240  break;
241  case C_SPEC:
242  SG_INFO( "C_SPEC ");
243  break;
244  case C_WEIGHTEDSPEC:
245  SG_INFO( "C_WEIGHTEDSPEC ");
246  break;
247  case C_STREAMING_SIMPLE:
248  SG_INFO( "C_STREAMING_SIMPLE ");
249  break;
250  case C_STREAMING_SPARSE:
251  SG_INFO( "C_STREAMING_SPARSE ");
252  break;
253  case C_STREAMING_STRING:
254  SG_INFO( "C_STREAMING_STRING ");
255  break;
256  case C_STREAMING_VW:
257  SG_INFO( "C_STREAMING_VW ");
258  break;
259  case C_ANY:
260  SG_INFO( "C_ANY ");
261  break;
262  default:
263  SG_ERROR( "ERROR UNKNOWN FEATURE CLASS");
264  }
265 
266  switch (get_feature_type())
267  {
268  case F_UNKNOWN:
269  SG_INFO( "F_UNKNOWN \n");
270  break;
271  case F_CHAR:
272  SG_INFO( "F_CHAR \n");
273  break;
274  case F_BYTE:
275  SG_INFO( "F_BYTE \n");
276  break;
277  case F_SHORT:
278  SG_INFO( "F_SHORT \n");
279  break;
280  case F_WORD:
281  SG_INFO( "F_WORD \n");
282  break;
283  case F_INT:
284  SG_INFO( "F_INT \n");
285  break;
286  case F_UINT:
287  SG_INFO( "F_UINT \n");
288  break;
289  case F_LONG:
290  SG_INFO( "F_LONG \n");
291  break;
292  case F_ULONG:
293  SG_INFO( "F_ULONG \n");
294  break;
295  case F_SHORTREAL:
296  SG_INFO( "F_SHORTEAL \n");
297  break;
298  case F_DREAL:
299  SG_INFO( "F_DREAL \n");
300  break;
301  case F_LONGREAL:
302  SG_INFO( "F_LONGREAL \n");
303  break;
304  case F_ANY:
305  SG_INFO( "F_ANY \n");
306  break;
307  default:
308  SG_ERROR( "ERROR UNKNOWN FEATURE TYPE\n");
309  }
310 }
311 
312 
313 void CFeatures::load(CFile* loader)
314 {
318 }
319 
320 void CFeatures::save(CFile* writer)
321 {
325 }
326 
328 {
329  bool result=false;
330 
331  if (f)
332  result= ( (this->get_feature_class() == f->get_feature_class()) &&
333  (this->get_feature_type() == f->get_feature_type()));
334  return result;
335 }
336 
338 {
339  return (properties & p) != 0;
340 }
341 
343 {
344  properties |= p;
345 }
346 
348 {
349  properties &= (properties | p) ^ p;
350 }
351 
353 {
355  m_subset=subset;
356  SG_REF(subset);
358 }
359 
361 {
362  return m_subset ? m_subset->subset_idx_conversion(idx) : idx;
363 }
364 
366 {
367  return m_subset!=NULL;
368 }
369 
371 {
372  set_subset(NULL);
373 }
374 
376 {
377  SG_ERROR("copy_subset and therefore model storage of CMachine "
378  "(required for cross-validation and model-selection is ",
379  "not yet implemented for feature type %s\n", get_name());
380  return NULL;
381 }
virtual const char * get_name() const =0
bool has_property(EFeatureProperty p)
Definition: Features.cpp:337
class for adding subset support to a class. Provides an interface for getting/setting subset_matrices...
Definition: Subset.h:24
virtual void save(CFile *writer)
Definition: Features.cpp:320
#define SG_INFO(...)
Definition: SGIO.h:73
#define SG_RESET_LOCALE
Definition: SGIO.h:67
bool is_preprocessed(int32_t num)
Definition: Features.cpp:194
virtual bool reshape(int32_t num_features, int32_t num_vectors)
Definition: Features.cpp:209
bool check_feature_compatibility(CFeatures *f)
Definition: Features.cpp:327
virtual EFeatureType get_feature_type()=0
void set_preprocessed(int32_t num)
Definition: Features.cpp:189
void list_feature_obj()
Definition: Features.cpp:215
virtual CPreprocessor * del_preprocessor(int32_t num)
del current preprocessor
Definition: Features.cpp:144
CSubset * m_subset
Definition: Features.h:278
CFeatures(int32_t size=0)
Definition: Features.cpp:22
void set_property(EFeatureProperty p)
Definition: Features.cpp:342
#define SG_ERROR(...)
Definition: SGIO.h:75
#define SG_NOTIMPLEMENTED
Definition: SGIO.h:82
Parameter * m_parameters
Definition: SGObject.h:297
#define SG_REF(x)
Definition: SGObject.h:44
#define SG_SET_LOCALE_C
Definition: SGIO.h:66
int32_t get_num_preprocessors() const
Definition: Features.cpp:199
virtual void subset_changed_post()
Definition: Features.h:234
void add(bool *param, const char *name, const char *description="")
Definition: Parameter.cpp:23
#define ASSERT(x)
Definition: SGIO.h:102
Class SGObject is the base class of all shogun objects.
Definition: SGObject.h:76
virtual EFeatureClass get_feature_class()=0
void unset_property(EFeatureProperty p)
Definition: Features.cpp:347
A File access base class.
Definition: File.h:33
virtual void load(CFile *loader)
Definition: Features.cpp:313
#define SG_FREE(ptr)
Definition: memory.h:39
virtual int32_t add_preprocessor(CPreprocessor *p)
set preprocessor
Definition: Features.cpp:82
#define SG_UNREF(x)
Definition: SGObject.h:45
void add_vector(bool **param, index_t *length, const char *name, const char *description="")
Definition: Parameter.cpp:306
virtual void remove_subset()
Definition: Features.cpp:370
virtual CFeatures * copy_subset(SGVector< index_t > indices)
Definition: Features.cpp:375
int32_t get_num_preprocessed()
get whether specified preprocessor (or all if num=1) was/were already applied
Definition: Features.cpp:124
CPreprocessor * get_preprocessor(int32_t num)
get current preprocessor
Definition: Features.cpp:112
The class Features is the base class of all feature objects.
Definition: Features.h:56
index_t subset_idx_conversion(index_t idx) const
Definition: Subset.h:55
Class Preprocessor defines a preprocessor interface.
Definition: Preprocessor.h:74
index_t subset_idx_conversion(index_t idx) const
Definition: Features.cpp:360
virtual ~CFeatures()
Definition: Features.cpp:50
void clean_preprocessors()
clears all preprocs
Definition: Features.cpp:138
int32_t get_cache_size()
Definition: Features.cpp:204
int32_t index_t
Definition: DataType.h:25
virtual void set_subset(CSubset *subset)
Definition: Features.cpp:352
bool has_subset() const
Definition: Features.cpp:365
EFeatureProperty
shogun feature properties
Definition: FeatureTypes.h:55
#define SG_MALLOC(type, len)
Definition: memory.h:36

SHOGUN Machine Learning Toolbox - Documentation