SHOGUN  v1.1.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
Kernel.h
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 1999-2009 Soeren Sonnenburg
8  * Written (W) 1999-2008 Gunnar Raetsch
9  * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society
10  */
11 
12 #ifndef _KERNEL_H___
13 #define _KERNEL_H___
14 
15 #include <shogun/lib/common.h>
16 #include <shogun/lib/Signal.h>
17 #include <shogun/io/File.h>
20 #include <shogun/base/SGObject.h>
23 
24 #include <vector>
25 
26 namespace shogun
27 {
28  class CFile;
29  class CFeatures;
31 
32 #ifdef USE_SHORTREAL_KERNELCACHE
33 
35 #else
36 
38 #endif
39 
41 typedef int64_t KERNELCACHE_IDX;
42 
43 
46 {
49 };
50 
53 {
54  K_UNKNOWN = 0,
55  K_LINEAR = 10,
56  K_POLY = 20,
57  K_GAUSSIAN = 30,
61  K_SALZBERG = 41,
69  K_POLYMATCH = 100,
70  K_ALIGNMENT = 110,
75  K_COMBINED = 140,
76  K_AUC = 150,
77  K_CUSTOM = 160,
78  K_SIGMOID = 170,
79  K_CHI2 = 180,
80  K_DIAG = 190,
81  K_CONST = 200,
82  K_DISTANCE = 220,
85  K_OLIGO = 250,
86  K_MATCHWORD = 260,
87  K_TPPK = 270,
91  K_WAVELET = 310,
92  K_WAVE = 320,
93  K_CAUCHY = 330,
94  K_TSTUDENT = 340,
98  K_SPHERICAL = 380,
99  K_SPLINE = 390,
100  K_ANOVA = 400,
101  K_POWER = 410,
102  K_LOG = 420,
103  K_CIRCULAR = 430,
106  K_BESSEL = 460,
107 };
108 
111 {
112  KP_NONE = 0,
113  KP_LINADD = 1, // Kernels that can be optimized via doing normal updates w + dw
114  KP_KERNCOMBINATION = 2, // Kernels that are infact a linear combination of subkernels K=\sum_i b_i*K_i
115  KP_BATCHEVALUATION = 4 // Kernels that can on the fly generate normals in linadd and more quickly/memory efficient process batches instead of single examples
116 };
117 
118 #ifndef DOXYGEN_SHOULD_SKIP_THIS
119 
120 template <class T> struct K_THREAD_PARAM
121 {
123  CKernel* kernel;
125  int32_t start;
127  int32_t end;
129  int32_t total_start;
131  int32_t total_end;
133  int32_t m;
135  int32_t n;
137  T* result;
139  bool symmetric;
141  bool verbose;
142 };
143 #endif
144 
145 class CSVM;
146 
172 class CKernel : public CSGObject
173 {
184  friend class CDiceKernelNormalizer;
186 
187  public:
188 
192  CKernel();
193 
194 
199  CKernel(int32_t size);
200 
207  CKernel(CFeatures* l, CFeatures* r, int32_t size);
208 
209  virtual ~CKernel();
210 
218  inline float64_t kernel(int32_t idx_a, int32_t idx_b)
219  {
220  if (idx_a<0 || idx_b<0 || idx_a>=num_lhs || idx_b>=num_rhs)
221  {
222  SG_ERROR("Index out of Range: idx_a=%d/%d idx_b=%d/%d\n",
223  idx_a,num_lhs, idx_b,num_rhs);
224  }
225 
226  return normalizer->normalize(compute(idx_a, idx_b), idx_a, idx_b);
227  }
228 
234  {
235  return get_kernel_matrix<float64_t>();
236  }
237 
244  {
245 
247 
248  for (int32_t i=0; i!=num_rhs; i++)
249  col[i] = kernel(i,j);
250 
251  return col;
252  }
253 
254 
261  {
263 
264  for (int32_t j=0; j!=num_lhs; j++)
265  row[j] = kernel(i,j);
266 
267  return row;
268  }
269 
274  template <class T>
276  {
277  T* result = NULL;
278 
279  if (!has_features())
280  SG_ERROR( "no features assigned to kernel\n");
281 
282  int32_t m=get_num_vec_lhs();
283  int32_t n=get_num_vec_rhs();
284 
285  int64_t total_num = int64_t(m)*n;
286 
287  // if lhs == rhs and sizes match assume k(i,j)=k(j,i)
288  bool symmetric= (lhs && lhs==rhs && m==n);
289 
290  SG_DEBUG( "returning kernel matrix of size %dx%d\n", m, n);
291 
292  result=SG_MALLOC(T, total_num);
293 
294  int32_t num_threads=parallel->get_num_threads();
295  if (num_threads < 2)
296  {
297  K_THREAD_PARAM<T> params;
298  params.kernel=this;
299  params.result=result;
300  params.start=0;
301  params.end=m;
302  params.total_start=0;
303  params.total_end=total_num;
304  params.n=n;
305  params.m=m;
306  params.symmetric=symmetric;
307  params.verbose=true;
308  get_kernel_matrix_helper<T>((void*) &params);
309  }
310  else
311  {
312  pthread_t* threads = SG_MALLOC(pthread_t, num_threads-1);
313  K_THREAD_PARAM<T>* params = SG_MALLOC(K_THREAD_PARAM<T>, num_threads);
314  int64_t step= total_num/num_threads;
315 
316  int32_t t;
317 
318  num_threads--;
319  for (t=0; t<num_threads; t++)
320  {
321  params[t].kernel = this;
322  params[t].result = result;
323  params[t].start = compute_row_start(t*step, n, symmetric);
324  params[t].end = compute_row_start((t+1)*step, n, symmetric);
325  params[t].total_start=t*step;
326  params[t].total_end=(t+1)*step;
327  params[t].n=n;
328  params[t].m=m;
329  params[t].symmetric=symmetric;
330  params[t].verbose=false;
331 
332  int code=pthread_create(&threads[t], NULL,
333  CKernel::get_kernel_matrix_helper<T>, (void*)&params[t]);
334 
335  if (code != 0)
336  {
337  SG_WARNING("Thread creation failed (thread %d of %d) "
338  "with error:'%s'\n",t, num_threads, strerror(code));
339  num_threads=t;
340  break;
341  }
342  }
343 
344  params[t].kernel = this;
345  params[t].result = result;
346  params[t].start = compute_row_start(t*step, n, symmetric);
347  params[t].end = m;
348  params[t].total_start=t*step;
349  params[t].total_end=total_num;
350  params[t].n=n;
351  params[t].m=m;
352  params[t].symmetric=symmetric;
353  params[t].verbose=true;
354  get_kernel_matrix_helper<T>(&params[t]);
355 
356  for (t=0; t<num_threads; t++)
357  {
358  if (pthread_join(threads[t], NULL) != 0)
359  SG_WARNING("pthread_join of thread %d/%d failed\n", t, num_threads);
360  }
361 
362  SG_FREE(params);
363  SG_FREE(threads);
364  }
365 
366  SG_DONE();
367 
368  return SGMatrix<T>(result,m,n,true);
369  }
370 
371 
382  virtual bool init(CFeatures* lhs, CFeatures* rhs);
383 
389 
395 
399  virtual bool init_normalizer();
400 
407  virtual void cleanup();
408 
413  void load(CFile* loader);
414 
419  void save(CFile* writer);
420 
425  inline CFeatures* get_lhs() { SG_REF(lhs); return lhs; }
426 
431  inline CFeatures* get_rhs() { SG_REF(rhs); return rhs; }
432 
437  virtual inline int32_t get_num_vec_lhs()
438  {
439  return num_lhs;
440  }
441 
446  virtual inline int32_t get_num_vec_rhs()
447  {
448  return num_rhs;
449  }
450 
455  virtual inline bool has_features()
456  {
457  return lhs && rhs;
458  }
459 
464  inline bool get_lhs_equals_rhs()
465  {
466  return lhs_equals_rhs;
467  }
468 
470  virtual void remove_lhs_and_rhs();
471 
473  virtual void remove_lhs();
474 
476  virtual void remove_rhs();
477 
485  virtual EKernelType get_kernel_type()=0 ;
486 
493  virtual EFeatureType get_feature_type()=0;
494 
501  virtual EFeatureClass get_feature_class()=0;
502 
507  inline void set_cache_size(int32_t size)
508  {
509  cache_size = size;
510 
511  }
512 
517  inline int32_t get_cache_size() { return cache_size; }
518 
519 
520 
522  void list_kernel();
523 
529  inline bool has_property(EKernelProperty p) { return (properties & p) != 0; }
530 
534  virtual void clear_normal();
535 
541  virtual void add_to_normal(int32_t vector_idx, float64_t weight);
542 
548 
553  virtual inline void set_optimization_type(EOptimizationType t) { opt_type=t;}
554 
560 
568  virtual bool init_optimization(
569  int32_t count, int32_t *IDX, float64_t *weights);
570 
575  virtual bool delete_optimization();
576 
582  bool init_optimization_svm(CSVM * svm) ;
583 
589  virtual float64_t compute_optimized(int32_t vector_idx);
590 
599  virtual void compute_batch(
600  int32_t num_vec, int32_t* vec_idx, float64_t* target,
601  int32_t num_suppvec, int32_t* IDX, float64_t* alphas,
602  float64_t factor=1.0);
603 
609 
615 
620  virtual int32_t get_num_subkernels();
621 
627  virtual void compute_by_subkernel(
628  int32_t vector_idx, float64_t * subkernel_contrib);
629 
635  virtual const float64_t* get_subkernel_weights(int32_t& num_weights);
636 
641  virtual void set_subkernel_weights(SGVector<float64_t> weights);
642 
643  protected:
649  {
650  properties |= p;
651  }
652 
658  {
659  properties &= (properties | p) ^ p;
660  }
661 
666  inline void set_is_initialized(bool p_init) { optimization_initialized=p_init; }
667 
678  virtual float64_t compute(int32_t x, int32_t y)=0;
679 
686  int32_t compute_row_start(int64_t offs, int32_t n, bool symmetric)
687  {
688  int32_t i_start;
689 
690  if (symmetric)
691  i_start=(int32_t) CMath::floor(n-CMath::sqrt(CMath::sq((float64_t) n)-offs));
692  else
693  i_start=(int32_t) (offs/int64_t(n));
694 
695  return i_start;
696  }
697 
702  template <class T>
703  static void* get_kernel_matrix_helper(void* p)
704  {
705  K_THREAD_PARAM<T>* params= (K_THREAD_PARAM<T>*) p;
706  int32_t i_start=params->start;
707  int32_t i_end=params->end;
708  CKernel* k=params->kernel;
709  T* result=params->result;
710  bool symmetric=params->symmetric;
711  int32_t n=params->n;
712  int32_t m=params->m;
713  bool verbose=params->verbose;
714  int64_t total_start=params->total_start;
715  int64_t total_end=params->total_end;
716  int64_t total=total_start;
717 
718  for (int32_t i=i_start; i<i_end; i++)
719  {
720  int32_t j_start=0;
721 
722  if (symmetric)
723  j_start=i;
724 
725  for (int32_t j=j_start; j<n; j++)
726  {
727  float64_t v=k->kernel(i,j);
728  result[i+j*m]=v;
729 
730  if (symmetric && i!=j)
731  result[j+i*m]=v;
732 
733  if (verbose)
734  {
735  total++;
736 
737  if (symmetric && i!=j)
738  total++;
739 
740  if (total%100 == 0)
741  k->SG_PROGRESS(total, total_start, total_end);
742 
744  break;
745  }
746  }
747 
748  }
749 
750  return NULL;
751  }
752 
761  virtual void load_serializable_post() throw (ShogunException);
762 
771  virtual void save_serializable_pre() throw (ShogunException);
772 
781  virtual void save_serializable_post() throw (ShogunException);
786  virtual void register_params();
787 
788  private:
791  void init();
792 
793 
794 
796 
797  protected:
799  int32_t cache_size;
800 
801 
802 
806 
811 
814 
816  int32_t num_lhs;
818  int32_t num_rhs;
819 
822 
829 
831  uint64_t properties;
832 
836 };
837 
838 }
839 #endif /* _KERNEL_H__ */
virtual void clear_normal()
Definition: Kernel.cpp:335
virtual void load_serializable_post()
Definition: Kernel.cpp:384
int32_t compute_row_start(int64_t offs, int32_t n, bool symmetric)
Definition: Kernel.h:686
The MultitaskKernel allows Multitask Learning via a modified kernel function.
virtual void cleanup()
Definition: Kernel.cpp:138
#define SG_DONE()
Definition: SGIO.h:87
virtual void compute_by_subkernel(int32_t vector_idx, float64_t *subkernel_contrib)
Definition: Kernel.cpp:345
EKernelType
Definition: Kernel.h:52
int32_t get_num_threads() const
Definition: Parallel.cpp:58
virtual float64_t compute(int32_t x, int32_t y)=0
DiceKernelNormalizer performs kernel normalization inspired by the Dice coefficient (see http://en...
The MultitaskKernel allows Multitask Learning via a modified kernel function.
SGMatrix< T > get_kernel_matrix()
Definition: Kernel.h:275
int32_t num_rhs
number of feature vectors on right hand side
Definition: Kernel.h:818
Class ShogunException defines an exception which is thrown whenever an error inside of shogun occurs...
virtual bool set_normalizer(CKernelNormalizer *normalizer)
Definition: Kernel.cpp:115
static T sq(T x)
x^2
Definition: Math.h:277
bool get_lhs_equals_rhs()
Definition: Kernel.h:464
CFeatures * get_rhs()
Definition: Kernel.h:431
#define SG_ERROR(...)
Definition: SGIO.h:75
void set_is_initialized(bool p_init)
Definition: Kernel.h:666
virtual bool delete_optimization()
Definition: Kernel.cpp:311
int64_t KERNELCACHE_IDX
Definition: Kernel.h:41
void set_cache_size(int32_t size)
Definition: Kernel.h:507
float64_t kernel(int32_t idx_a, int32_t idx_b)
Definition: Kernel.h:218
virtual void set_optimization_type(EOptimizationType t)
Definition: Kernel.h:553
uint64_t properties
Definition: Kernel.h:831
Parallel * parallel
Definition: SGObject.h:291
virtual void remove_rhs()
takes all necessary steps if the rhs is removed from kernel
Definition: Kernel.cpp:187
TanimotoKernelNormalizer performs kernel normalization inspired by the Tanimoto coefficient (see http...
virtual int32_t get_num_vec_lhs()
Definition: Kernel.h:437
#define SG_REF(x)
Definition: SGObject.h:44
static float64_t floor(float64_t d)
Definition: Math.h:202
int32_t cache_size
cache_size in MB
Definition: Kernel.h:799
EFeatureClass
shogun feature class
Definition: FeatureTypes.h:35
bool get_is_initialized()
Definition: Kernel.h:559
float64_t combined_kernel_weight
Definition: Kernel.h:821
virtual void register_params()
Definition: Kernel.cpp:407
void save(CFile *writer)
Definition: Kernel.cpp:151
virtual SGVector< float64_t > get_kernel_col(int32_t j)
Definition: Kernel.h:243
virtual void remove_lhs_and_rhs()
Definition: Kernel.cpp:160
bool has_property(EKernelProperty p)
Definition: Kernel.h:529
virtual CKernelNormalizer * get_normalizer()
Definition: Kernel.cpp:127
Class SGObject is the base class of all shogun objects.
Definition: SGObject.h:76
EKernelProperty
Definition: Kernel.h:110
double float64_t
Definition: common.h:56
SGMatrix< float64_t > get_kernel_matrix()
Definition: Kernel.h:233
virtual EFeatureType get_feature_type()=0
void set_combined_kernel_weight(float64_t nw)
Definition: Kernel.h:614
KERNELCACHE_ELEM * kernel_matrix
Definition: Kernel.h:805
A File access base class.
Definition: File.h:33
virtual void save_serializable_post()
Definition: Kernel.cpp:399
virtual float64_t compute_optimized(int32_t vector_idx)
Definition: Kernel.cpp:317
EOptimizationType get_optimization_type()
Definition: Kernel.h:547
void unset_property(EKernelProperty p)
Definition: Kernel.h:657
void list_kernel()
Definition: Kernel.cpp:200
float64_t get_combined_kernel_weight()
Definition: Kernel.h:608
The MultitaskKernel allows Multitask Learning via a modified kernel function.
virtual const float64_t * get_subkernel_weights(int32_t &num_weights)
Definition: Kernel.cpp:351
Normalize the kernel by a constant obtained from the first element of the kernel matrix, i.e. .
Normalize the kernel by adding a constant term to its diagonal. This aids kernels to become positive ...
int32_t num_lhs
number of feature vectors on left hand side
Definition: Kernel.h:816
The class Kernel Normalizer defines a function to post-process kernel values.
static bool cancel_computations()
Definition: Signal.h:85
ZeroMeanCenterKernelNormalizer centers the kernel in feature space.
virtual int32_t get_num_vec_rhs()
Definition: Kernel.h:446
#define SG_FREE(ptr)
Definition: memory.h:39
virtual void set_subkernel_weights(SGVector< float64_t > weights)
Definition: Kernel.cpp:357
virtual bool init_normalizer()
Definition: Kernel.cpp:133
bool optimization_initialized
Definition: Kernel.h:824
float float32_t
Definition: common.h:55
EFeatureType
shogun feature type
Definition: FeatureTypes.h:16
EOptimizationType opt_type
Definition: Kernel.h:828
void load(CFile *loader)
Definition: Kernel.cpp:145
CFeatures * rhs
feature vectors to occur on right hand side
Definition: Kernel.h:810
Base-class for parameterized Kernel Normalizers.
#define SG_DEBUG(...)
Definition: SGIO.h:72
SqrtDiagKernelNormalizer divides by the Square Root of the product of the diagonal elements...
virtual void compute_batch(int32_t num_vec, int32_t *vec_idx, float64_t *target, int32_t num_suppvec, int32_t *IDX, float64_t *alphas, float64_t factor=1.0)
Definition: Kernel.cpp:323
EOptimizationType
Definition: Kernel.h:45
bool lhs_equals_rhs
lhs
Definition: Kernel.h:813
Normalize the kernel by either a constant or the average value of the diagonal elements (depending on...
virtual EKernelType get_kernel_type()=0
virtual bool init_optimization(int32_t count, int32_t *IDX, float64_t *weights)
Definition: Kernel.cpp:304
CFeatures * lhs
feature vectors to occur on left hand side
Definition: Kernel.h:808
The class Features is the base class of all feature objects.
Definition: Features.h:56
virtual void save_serializable_pre()
Definition: Kernel.cpp:391
static void * get_kernel_matrix_helper(void *p)
Definition: Kernel.h:703
virtual void remove_lhs()
Definition: Kernel.cpp:175
virtual int32_t get_num_subkernels()
Definition: Kernel.cpp:340
bool init_optimization_svm(CSVM *svm)
Definition: Kernel.cpp:366
A generic Support Vector Machine Interface.
Definition: SVM.h:46
The Kernel base class.
Definition: Kernel.h:172
int32_t get_cache_size()
Definition: Kernel.h:517
CKernelNormalizer * normalizer
Definition: Kernel.h:835
virtual SGVector< float64_t > get_kernel_row(int32_t i)
Definition: Kernel.h:260
#define SG_WARNING(...)
Definition: SGIO.h:74
virtual float64_t normalize(float64_t value, int32_t idx_lhs, int32_t idx_rhs)=0
static float32_t sqrt(float32_t x)
x^0.5
Definition: Math.h:283
virtual bool has_features()
Definition: Kernel.h:455
virtual ~CKernel()
Definition: Kernel.cpp:70
virtual void add_to_normal(int32_t vector_idx, float64_t weight)
Definition: Kernel.cpp:330
#define SG_MALLOC(type, len)
Definition: memory.h:36
float64_t KERNELCACHE_ELEM
Definition: Kernel.h:30
void set_property(EKernelProperty p)
Definition: Kernel.h:648
VarianceKernelNormalizer divides by the ``variance&#39;&#39;.
virtual EFeatureClass get_feature_class()=0
CFeatures * get_lhs()
Definition: Kernel.h:425

SHOGUN Machine Learning Toolbox - Documentation