SHOGUN  v1.1.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
SparseFeatures.h
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 1999-2010 Soeren Sonnenburg
8  * Written (W) 1999-2008 Gunnar Raetsch
9  * Subset support written (W) 2011 Heiko Strathmann
10  * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society
11  * Copyright (C) 2010 Berlin Institute of Technology
12  */
13 
14 #ifndef _SPARSEFEATURES__H__
15 #define _SPARSEFEATURES__H__
16 
17 #include <shogun/lib/common.h>
18 #include <shogun/lib/Cache.h>
19 #include <shogun/io/File.h>
20 
21 #include <shogun/features/Labels.h>
25 
26 namespace shogun
27 {
28 
29 class CFile;
30 class CLabels;
31 class CFeatures;
32 class CDotFeatures;
33 template <class ST> class CSimpleFeatures;
34 
52 template <class ST> class CSparseFeatures : public CDotFeatures
53 {
54  public:
59  CSparseFeatures(int32_t size=0);
60 
69  CSparseFeatures(SGSparseVector<ST>* src,
70  int32_t num_feat, int32_t num_vec,bool copy=false);
71 
77  CSparseFeatures(SGSparseMatrix<ST> sparse);
78 
84  CSparseFeatures(SGMatrix<ST> dense);
85 
87  CSparseFeatures(const CSparseFeatures & orig);
88 
93  CSparseFeatures(CFile* loader);
94 
96  virtual ~CSparseFeatures();
97 
103 
108  void free_sparse_features();
109 
114  virtual CFeatures* duplicate() const;
115 
125  ST get_feature(int32_t num, int32_t index);
126 
135  ST* get_full_feature_vector(int32_t num, int32_t& len);
136 
142  SGVector<ST> get_full_feature_vector(int32_t num);
143 
149  virtual int32_t get_nnz_features_for_vector(int32_t num);
150 
160  SGSparseVector<ST> get_sparse_feature_vector(int32_t num);
161 
172  static ST sparse_dot(ST alpha, SGSparseVectorEntry<ST>* avec, int32_t alen,
173  SGSparseVectorEntry<ST>* bvec, int32_t blen);
174 
187  ST dense_dot(ST alpha, int32_t num, ST* vec, int32_t dim, ST b);
188 
200  void add_to_dense_vec(float64_t alpha, int32_t num,
201  float64_t* vec, int32_t dim, bool abs_val=false);
202 
210  void free_sparse_feature_vector(SGSparseVector<ST> vec, int32_t num);
211 
221  SGSparseVector<ST>* get_sparse_feature_matrix(int32_t &num_feat, int32_t &num_vec);
222 
230  SGSparseMatrix<ST> get_sparse_feature_matrix();
231 
237  static void clean_tsparse(SGSparseVector<ST>* sfm, int32_t num_vec);
238 
245  CSparseFeatures<ST>* get_transposed();
246 
258  SGSparseVector<ST>* get_transposed(int32_t &num_feat, int32_t &num_vec);
259 
267  void set_sparse_feature_matrix(SGSparseMatrix<ST> sm);
268 
275  SGMatrix<ST> get_full_feature_matrix();
276 
286  virtual bool set_full_feature_matrix(SGMatrix<ST> full);
287 
295  virtual bool apply_preprocessor(bool force_preprocessing=false);
296 
301  virtual int32_t get_size();
302 
310  bool obtain_from_simple(CSimpleFeatures<ST>* sf);
311 
316  virtual int32_t get_num_vectors() const;
317 
322  int32_t get_num_features();
323 
335  int32_t set_num_features(int32_t num);
336 
342 
347  virtual EFeatureType get_feature_type();
348 
356  void free_feature_vector(SGSparseVector<ST> vec, int32_t num);
357 
362  int64_t get_num_nonzero_entries();
363 
372 
387  float64_t compute_squared_norm(CSparseFeatures<float64_t>* lhs,
388  float64_t* sq_lhs, int32_t idx_a,
389  CSparseFeatures<float64_t>* rhs, float64_t* sq_rhs,
390  int32_t idx_b);
391 
398  void load(CFile* loader);
399 
406  void save(CFile* writer);
407 
417  CLabels* load_svmlight_file(char* fname, bool do_sort_features=true);
418 
424  void sort_features();
425 
434  bool write_svmlight_file(char* fname, CLabels* label);
435 
443  virtual int32_t get_dim_feature_space() const;
444 
454  virtual float64_t dot(int32_t vec_idx1, CDotFeatures* df, int32_t vec_idx2);
455 
464  virtual float64_t dense_dot(int32_t vec_idx1, const float64_t* vec2, int32_t vec2_len);
465 
466  #ifndef DOXYGEN_SHOULD_SKIP_THIS
467 
468  struct sparse_feature_iterator
469  {
471  SGSparseVector<ST> sv;
472 
474  int32_t index;
475 
477  void print_info()
478  {
479  SG_SPRINT("sv=%p, vidx=%d, num_feat_entries=%d, index=%d\n",
480  sv.features, sv.vec_index, sv.num_feat_entries, index);
481  }
482  };
483  #endif
484 
496  virtual void* get_feature_iterator(int32_t vector_index);
497 
508  virtual bool get_next_feature(int32_t& index, float64_t& value, void* iterator);
509 
515  virtual void free_feature_iterator(void* iterator);
516 
523  virtual CFeatures* copy_subset(SGVector<index_t> indices);
524 
526  inline virtual const char* get_name() const { return "SparseFeatures"; }
527 
528  protected:
540  int32_t& len, SGSparseVectorEntry<ST>* target=NULL);
541 
542  private:
543  void init();
544 
545  protected:
546 
548  int32_t num_vectors;
549 
551  int32_t num_features;
552 
555 
558 };
559 }
560 #endif /* _SPARSEFEATURES__H__ */
void save(CFile *writer)
virtual bool get_next_feature(int32_t &index, float64_t &value, void *iterator)
CCache< SGSparseVectorEntry< ST > > * feature_cache
virtual CFeatures * duplicate() const
void free_feature_vector(SGSparseVector< ST > vec, int32_t num)
static ST sparse_dot(ST alpha, SGSparseVectorEntry< ST > *avec, int32_t alen, SGSparseVectorEntry< ST > *bvec, int32_t blen)
CLabels * load_svmlight_file(char *fname, bool do_sort_features=true)
CDotFeatures(int32_t size=0)
Definition: DotFeatures.cpp:42
CFeatures(int32_t size=0)
Definition: Features.cpp:22
int32_t num_vectors
total number of vectors
virtual float64_t dot(int32_t vec_idx1, CDotFeatures *df, int32_t vec_idx2)
void set_sparse_feature_matrix(SGSparseMatrix< ST > sm)
ST * get_full_feature_vector(int32_t num, int32_t &len)
virtual EFeatureType get_feature_type()
float64_t compute_squared_norm(CSparseFeatures< float64_t > *lhs, float64_t *sq_lhs, int32_t idx_a, CSparseFeatures< float64_t > *rhs, float64_t *sq_rhs, int32_t idx_b)
virtual void free_feature_iterator(void *iterator)
virtual int32_t get_nnz_features_for_vector(int32_t num)
virtual int32_t get_dim_feature_space() const
ST dense_dot(ST alpha, int32_t num, ST *vec, int32_t dim, ST b)
bool obtain_from_simple(CSimpleFeatures< ST > *sf)
EFeatureClass
shogun feature class
Definition: FeatureTypes.h:35
template class SGSparseVector
Definition: File.h:22
SGSparseMatrix< ST > get_sparse_feature_matrix()
virtual int32_t get_num_vectors() const
#define SG_SPRINT(...)
Definition: SGIO.h:95
virtual void * get_feature_iterator(int32_t vector_index)
CSparseFeatures< ST > * get_transposed()
virtual SGSparseVectorEntry< ST > * compute_sparse_feature_vector(int32_t num, int32_t &len, SGSparseVectorEntry< ST > *target=NULL)
double float64_t
Definition: common.h:56
SGMatrix< ST > get_full_feature_matrix()
void load(CFile *loader)
virtual bool set_full_feature_matrix(SGMatrix< ST > full)
SGSparseVector< ST > * sparse_feature_matrix
array of sparse vectors of size num_vectors
virtual EFeatureClass get_feature_class()
bool write_svmlight_file(char *fname, CLabels *label)
void add_to_dense_vec(float64_t alpha, int32_t num, float64_t *vec, int32_t dim, bool abs_val=false)
EFeatureType
shogun feature type
Definition: FeatureTypes.h:16
virtual bool apply_preprocessor(bool force_preprocessing=false)
float64_t * compute_squared(float64_t *sq)
ST get_feature(int32_t num, int32_t index)
int32_t set_num_features(int32_t num)
Template class Cache implements a simple cache.
Definition: Cache.h:31
virtual CFeatures * copy_subset(SGVector< index_t > indices)
virtual const char * get_name() const
void free_sparse_feature_vector(SGSparseVector< ST > vec, int32_t num)
int32_t num_features
total number of features
SGSparseVector< ST > get_sparse_feature_vector(int32_t num)
CSparseFeatures(int32_t size=0)
virtual int32_t get_size()
static void clean_tsparse(SGSparseVector< ST > *sfm, int32_t num_vec)

SHOGUN Machine Learning Toolbox - Documentation