21 init(file, is_labelled, size);
40 num_symbols=alphabet->get_num_symbols();
50 num_symbols=alphabet->get_num_symbols();
57 alpha_ascii=
new CAlphabet(ascii_alphabet);
65 alpha_ascii=
new CAlphabet(ascii_alphabet);
105 return current_length;
115 parser.set_read_vector_and_label
119 #define GET_FEATURE_TYPE(f_type, sg_type) \
120 template<> EFeatureType CStreamingStringFeatures<sg_type>::get_feature_type() \
138 #undef GET_FEATURE_TYPE
142 void CStreamingStringFeatures<T>::init()
145 alphabet=
new CAlphabet();
149 current_sgstring.string=current_string;
150 current_sgstring.slen=current_length;
154 void CStreamingStringFeatures<T>::init(CStreamingFile* file,
159 has_labels=is_labelled;
161 parser.init(file, is_labelled, size);
162 parser.set_free_vector_after_release(
false);
163 parser.set_free_vectors_on_destruct(
false);
170 alpha_ascii=alphabet;
172 if (!parser.is_running())
173 parser.start_parser();
187 ret_value = (bool) parser.get_next_example(current_string,
197 alpha_ascii->add_string_to_histogram(current_string, current_length);
199 for (i=0; i<current_length; i++)
200 current_string[i]=alpha_ascii->remap_to_bin(current_string[i]);
201 alpha_bin->add_string_to_histogram(current_string, current_length);
205 alpha_ascii->add_string_to_histogram(current_string, current_length);
209 if ( !(alpha_ascii->check_alphabet_size() && alpha_ascii->check_alphabet()) )
211 SG_ERROR(
"StreamingStringFeatures: The given input was found to be incompatible with the alphabet!\n");
220 alphabet=alpha_ascii;
223 num_symbols=alphabet->get_num_symbols();
231 current_sgstring.string=current_string;
232 current_sgstring.slen=current_length;
234 return current_sgstring;
242 return current_label;
248 parser.finalize_example();
254 return current_length;
virtual CFeatures * duplicate() const
void use_alphabet(EAlphabet alpha)
virtual void get_string(bool *&vector, int32_t &len)
CStreamingStringFeatures()
virtual float64_t get_label()
EAlphabet
Alphabet of charfeatures/observations.
SGString< T > get_vector()
virtual int32_t get_num_features()
The class Alphabet implements an alphabet and alphabet utility functions.
virtual int32_t get_num_vectors() const
EFeatureClass
shogun feature class
A Streaming File access class.
virtual int32_t get_vector_length()
virtual void get_string_and_label(bool *&vector, int32_t &len, float64_t &label)
virtual void end_parser()
bool remap_to_bin
Whether remapping must be done.
floatmax_t get_num_symbols()
This class implements streaming features as strings.
virtual bool get_next_example()
virtual void start_parser()
virtual void set_vector_reader()
void set_read_functions()
#define GET_FEATURE_TYPE(f_type, sg_type)
virtual int32_t get_size()
The class Features is the base class of all feature objects.
virtual ~CStreamingStringFeatures()
virtual EFeatureClass get_feature_class()
Streaming features are features which are used for online algorithms.
virtual void set_vector_and_label_reader()
virtual void release_example()
void set_remap(CAlphabet *ascii_alphabet, CAlphabet *binary_alphabet)
CAlphabet * get_alphabet()