SHOGUN  v1.1.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
StreamingStringFeatures.h
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 2011 Shashwat Lal Das
8  * Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society
9  */
10 #ifndef _STREAMING_STRINGFEATURES__H__
11 #define _STREAMING_STRINGFEATURES__H__
12 
13 #include <shogun/lib/common.h>
15 #include <shogun/base/Parameter.h>
16 #include <shogun/lib/DataType.h>
17 #include <shogun/io/InputParser.h>
18 
21 
22 namespace shogun
23 {
27 template <class T> class CStreamingStringFeatures : public CStreamingFeatures
28 {
29 public:
30 
39 
49  bool is_labelled,
50  int32_t size);
51 
57  virtual ~CStreamingStringFeatures();
58 
68  virtual void set_vector_reader();
69 
79  virtual void set_vector_and_label_reader();
80 
87  void use_alphabet(EAlphabet alpha);
88 
95  void use_alphabet(CAlphabet* alpha);
96 
104  void set_remap(CAlphabet* ascii_alphabet, CAlphabet* binary_alphabet);
105 
113  void set_remap(EAlphabet ascii_alphabet=DNA, EAlphabet binary_alphabet=RAWDNA);
114 
120 
128 
134  virtual void start_parser();
135 
141  virtual void end_parser();
142 
151  virtual bool get_next_example();
152 
159 
167  virtual float64_t get_label();
168 
175  virtual void release_example();
176 
182  virtual int32_t get_vector_length();
183 
189  virtual EFeatureType get_feature_type();
190 
197 
203  virtual CFeatures* duplicate() const;
204 
210  inline virtual const char* get_name() const { return "StreamingStringFeatures"; }
211 
217  virtual int32_t get_num_vectors() const;
218 
224  virtual int32_t get_size();
225 
231  virtual int32_t get_num_features();
232 
233 private:
234 
239  void init();
240 
248  void init(CStreamingFile *file, bool is_labelled, int32_t size);
249 
250 protected:
251 
254 
257 
260 
263 
266 
269 
272 
274  int32_t current_length;
275 
278 
281 
284 
286  int32_t num_symbols;
287 };
288 
289 }
290 #endif // _STREAMING_STRINGFEATURES__H__
CAlphabet * alphabet
Alphabet to use.
float64_t current_label
The label of the current example, if applicable.
virtual CFeatures * duplicate() const
DNA - letters A,C,G,T.
Definition: Alphabet.h:23
int32_t num_symbols
Number of symbols.
virtual const char * get_name() const
RAWDNA - letters 0,1,2,3.
Definition: Alphabet.h:26
EAlphabet
Alphabet of charfeatures/observations.
Definition: Alphabet.h:20
The class Alphabet implements an alphabet and alphabet utility functions.
Definition: Alphabet.h:88
CStreamingFile * working_file
The StreamingFile object to read from.
CInputParser< T > parser
The parser object, which reads from input and returns parsed example objects.
EFeatureClass
shogun feature class
Definition: FeatureTypes.h:35
A Streaming File access class.
Definition: StreamingFile.h:38
shogun string
bool remap_to_bin
Whether remapping must be done.
This class implements streaming features as strings.
double float64_t
Definition: common.h:56
long double floatmax_t
Definition: common.h:57
T * current_string
The current example&#39;s string as a T*.
SGString< T > current_sgstring
The current example&#39;s string as an SGString&lt;T&gt;
EFeatureType
shogun feature type
Definition: FeatureTypes.h:16
Class CInputParser is a templated class used to maintain the reading/parsing/providing of examples...
Definition: InputParser.h:80
virtual EFeatureType get_feature_type()
The class Features is the base class of all feature objects.
Definition: Features.h:56
Streaming features are features which are used for online algorithms.
CAlphabet * alpha_bin
If remapping is enabled, this is the target alphabet.
bool has_labels
Whether examples are labelled or not.
void set_remap(CAlphabet *ascii_alphabet, CAlphabet *binary_alphabet)
CAlphabet * alpha_ascii
If remapping is enabled, this is the source alphabet.
int32_t current_length
The length of the current string.

SHOGUN Machine Learning Toolbox - Documentation