19 init(file, is_labelled, size);
31 ASSERT(index>=0 && index<current_num_features);
37 for (int32_t i=0; i<current_length; i++)
38 if (current_vector[i].feat_index==index)
39 ret += current_vector[i].entry;
53 int32_t n=current_num_features;
55 current_num_features=num;
62 int32_t dim = get_dim_feature_space();
66 memset(&vec[len], 0, (dim-len) *
sizeof(
float32_t));
74 int32_t dim = get_dim_feature_space();
78 memset(&vec[len], 0, (dim-len) *
sizeof(
float64_t));
94 for (int32_t i=0; i<alen; i++)
98 while ( (j<blen) && (bvec[j].feat_index < a_feat_idx) )
101 if ( (j<blen) && (bvec[j].feat_index == a_feat_idx) )
111 for (int32_t i=0; i<blen; i++)
115 while ( (j<alen) && (avec[j].feat_index < b_feat_idx) )
118 if ( (j<alen) && (avec[j].feat_index == b_feat_idx) )
136 ASSERT(dim>=current_num_features);
139 int32_t num_feat=current_length;
144 for (int32_t i=0; i<num_feat; i++)
145 result+=alpha*vec[sv[i].feat_index]*sv[i].entry;
155 if (vec2_len < current_num_features)
157 SG_ERROR(
"dimension of vec2 (=%d) does not match number of features (=%d)\n",
158 vec2_len, current_num_features);
164 for (int32_t i=0; i<current_length; i++)
165 result+=vec2[current_vector[i].feat_index]*current_vector[i].entry;
175 if (vec2_len < current_num_features)
177 SG_ERROR(
"dimension of vec2 (=%d) does not match number of features (=%d)\n",
178 vec2_len, current_num_features);
184 for (int32_t i=0; i<current_length; i++)
185 result+=vec2[current_vector[i].feat_index]*current_vector[i].entry;
195 if (vec2_len < current_num_features)
197 SG_ERROR(
"dimension of vec (=%d) does not match number of features (=%d)\n",
198 vec2_len, current_num_features);
202 int32_t num_feat=current_length;
208 for (int32_t i=0; i<num_feat; i++)
209 vec2[sv[i].feat_index]+= alpha*
CMath::abs(sv[i].entry);
213 for (int32_t i=0; i<num_feat; i++)
214 vec2[sv[i].feat_index]+= alpha*sv[i].entry;
223 if (vec2_len < current_num_features)
225 SG_ERROR(
"dimension of vec (=%d) does not match number of features (=%d)\n",
226 vec2_len, current_num_features);
230 int32_t num_feat=current_length;
236 for (int32_t i=0; i<num_feat; i++)
237 vec2[sv[i].feat_index]+= alpha*
CMath::abs(sv[i].entry);
241 for (int32_t i=0; i<num_feat; i++)
242 vec2[sv[i].feat_index]+= alpha*sv[i].entry;
250 return current_length;
260 for (int32_t i=0; i<current_length; i++)
261 sq += current_vector[i].entry * current_vector[i].entry;
272 int32_t len=current_length;
274 int32_t* feat_idx=
SG_MALLOC(int32_t, len);
275 int32_t* orig_idx=
SG_MALLOC(int32_t, len);
277 for (int32_t i=0; i<len; i++)
287 for (int32_t i=0; i<len; i++)
288 sf_new[i]=sf_orig[orig_idx[i]];
291 for (int32_t i=0; i<len-1; i++)
292 ASSERT(sf_new[i].feat_index<sf_new[i+1].feat_index);
295 for (int32_t i=0; i<len; i++)
296 sf_orig[i]=sf_new[i];
330 parser.set_read_vector_and_label
334 #define GET_FEATURE_TYPE(f_type, sg_type) \
335 template<> EFeatureType CStreamingSparseFeatures<sg_type>::get_feature_type() \
353 #undef GET_FEATURE_TYPE
357 void CStreamingSparseFeatures<T>::init()
363 current_num_features=-1;
367 void CStreamingSparseFeatures<T>::init(CStreamingFile* file,
372 has_labels = is_labelled;
374 parser.init(file, is_labelled, size);
380 if (!parser.is_running())
381 parser.start_parser();
394 ret_value = (bool) parser.get_next_example(current_vector,
402 for (int32_t i=0; i<current_length; i++)
404 if (current_vector[i].feat_index > current_num_features)
405 current_num_features = current_vector[i].feat_index+1;
415 current_sgvector.
features=current_vector;
416 current_sgvector.num_feat_entries=current_length;
417 current_sgvector.vec_index=current_vec_index;
419 return current_sgvector;
427 return current_label;
433 parser.finalize_example();
439 return current_num_features;
452 return current_num_features;
458 return current_length;
virtual void set_vector_reader()
int64_t get_num_nonzero_entries()
static void qsort_index(T1 *output, T2 *index, uint32_t size)
T get_feature(int32_t index)
virtual int32_t get_size()
#define SG_NOTIMPLEMENTED
virtual void add_to_dense_vec(float64_t alpha, float64_t *vec2, int32_t vec2_len, bool abs_val=false)
virtual void start_parser()
SGSparseVector< T > get_vector()
virtual int32_t get_num_features()
CStreamingSparseFeatures()
static T sparse_dot(T alpha, SGSparseVectorEntry< T > *avec, int32_t alen, SGSparseVectorEntry< T > *bvec, int32_t blen)
EFeatureClass
shogun feature class
virtual void reset_stream()
float32_t compute_squared()
A Streaming File access class.
virtual float32_t dot(CStreamingDotFeatures *df)
virtual int32_t get_nnz_features_for_vector()
virtual int32_t get_dim_feature_space() const
virtual void get_sparse_vector_and_label(SGSparseVectorEntry< bool > *&vector, int32_t &len, float64_t &label)
virtual CFeatures * duplicate() const
Streaming features that support dot products among other operations.
virtual int32_t get_num_vectors() const
#define SG_REALLOC(type, ptr, len)
virtual float64_t get_label()
virtual void expand_if_required(float32_t *&vec, int32_t &len)
void set_read_functions()
virtual void end_parser()
virtual void get_sparse_vector(SGSparseVectorEntry< bool > *&vector, int32_t &len)
template class SGSparseVectorEntry
virtual bool get_next_example()
int32_t set_num_features(int32_t num)
The class Features is the base class of all feature objects.
virtual EFeatureClass get_feature_class()
#define GET_FEATURE_TYPE(f_type, sg_type)
virtual void set_vector_and_label_reader()
T dense_dot(T alpha, T *vec, int32_t dim, T b)
This class implements streaming features with sparse feature vectors. The vector is represented as an...
#define SG_MALLOC(type, len)
virtual void release_example()
~CStreamingSparseFeatures()
static T abs(T a)
return the absolute value of a number
SGSparseVectorEntry< T > * features