19 using namespace shogun;
45 channels.begin = channels.end = channels.end_array = NULL;
47 words.begin = words.end = words.end_array = NULL;
49 name.begin = name.end = name.end_array = NULL;
63 substring example_string = {line, line + num_chars};
69 tokenize(
'|', example_string, channels);
75 feature_start = &channels[0];
81 if (tab_location != label_space.
end)
82 label_space.
start = tab_location+1;
86 if (words.index() > 0 && words.last().end == label_space.
end)
99 for (
substring* i = feature_start; i != channels.end; i++)
104 if (words.begin == words.end)
113 bool new_index =
false;
116 if (channel.
start[0] !=
' ')
122 if (name.index() > 0)
124 index = (
unsigned char)(*name[0].start);
136 index = (
unsigned char)
' ';
145 for (
substring* j = words.begin+feature_offset; j != words.end; j++)
160 if (new_index && ae->
atomics[index].begin != ae->
atomics[index].end)
174 int32_t num_chars = buf->
read_line(line);
179 substring example_string = {line, line + num_chars};
182 tokenize(
' ', example_string, words);
196 for (
substring* i = feature_start; i != words.end; i++)
216 int32_t num_chars = buf->
read_line(line);
221 substring example_string = {line, line + num_chars};
224 tokenize(
' ', example_string, words);
239 for (
substring* i = feature_start; i != words.end; i++)
257 char* file_name = fname;
258 char default_cache_name[] =
"vw_cache.dat.cache";
261 file_name = default_cache_name;
272 SG_ERROR(
"Protocol buffers cache support is not implemented yet.\n");
275 SG_ERROR(
"Unexpected cache type specified!\n");
283 switch (feat_name.
index())
293 SG_SERROR(
"error NaN value for feature %s! Terminating!\n",
297 SG_SERROR(
"Examples with a weird name, i.e., '%s'\n",
305 char *last = s.
start;
308 if (*s.
start == delim)
uint32_t vw_size_t
vw_size_t typedef to work across platforms
ssize_t read_line(char *&pointer)
void feature_value(substring &s, v_array< substring > &name, float32_t &v)
char * safe_index(char *start, char v, char *max)
const uint32_t hash_base
Seed for hash.
void push_many(const T *new_elem, size_t num)
Class CVwEnvironment is the environment used by VW.
int32_t read_features(CIOBuffer *buf, VwExample *&ex)
CVwEnvironment * env
Environment of VW - used by parser.
Class v_array is a templated class used to store variable length arrays. Memory locations are stored ...
void set_minmax(float64_t label)
CVwCacheWriter * cache_writer
Object which will be used for writing cache.
float32_t float_of_substring(substring s)
int32_t read_dense_features(CIOBuffer *buf, VwExample *&ae)
float64_t sum_feat_sq[256]
Sum of square of features.
struct Substring, specified by start position and end position.
void tokenize(char delim, substring s, v_array< substring > &ret)
void push(const T &new_elem)
bool write_cache
Whether to write cache or not.
float32_t label
Label value.
char * c_string_of_substring(substring s)
v_array< vw_size_t > indices
Array of namespaces.
float32_t weight
Weight of example.
Class SGObject is the base class of all shogun objects.
int32_t read_svmlight_features(CIOBuffer *buf, VwExample *&ae)
void parse_label(v_array< substring > &words)
vw_size_t mask
Mask used for hashing.
EVwCacheType cache_type
Type of cache.
float32_t initial
Initial approximation.
virtual void cache_example(VwExample *&ex)=0
static uint32_t MurmurHashString(substring s, uint32_t h)
VwLabel * ld
Label object.
void init_cache(char *fname, EVwCacheType type=C_NATIVE)
hash_func_t hasher
Hash function to use, of type hash_func_t.
Class CVwNativeCacheWriter writes a cache exactly as that which would be produced by VW's default cac...
v_array< VwFeature > atomics[256]
Array of features.