20 using namespace shogun;
22 #ifndef DOXYGEN_SHOULD_SKIP_THIS
23 struct HASHEDWD_THREAD_PARAM
36 #endif // DOXYGEN_SHOULD_SKIP_THIS
42 "CHashedWDFeaturesTransposed::CHashedWDFeaturesTransposed()",
64 int32_t start_order, int32_t order, int32_t from_order,
76 int32_t transposed_num_feat=0;
77 int32_t transposed_num_vec=0;
100 :
CDotFeatures(orig), strings(orig.strings), transposed_strings(orig.transposed_strings),
101 degree(orig.degree), start_degree(orig.start_degree),
102 from_degree(orig.from_degree), m_hash_bits(orig.m_hash_bits),
103 normalization_const(orig.normalization_const)
133 bool free_vec1, free_vec2;
136 uint8_t* vec2=wdf->strings->get_feature_vector(vec_idx2, len2, free_vec2);
142 for (int32_t i=0; i<len1; i++)
144 for (int32_t j=0; (i+j<len1) && (j<
degree); j++)
146 if (vec1[i+j]!=vec2[i+j])
153 wdf->strings->free_feature_vector(vec2, vec_idx2, free_vec2);
159 if (vec2_len !=
w_dim)
160 SG_ERROR(
"Dimensions don't match, vec2_dim=%d, w_dim=%d\n", vec2_len,
w_dim);
172 for (int32_t i=0; i < len; i++)
175 for (int32_t k=0; k<
degree && i+k<len; k++)
180 #ifdef DEBUG_HASHEDWD
181 SG_PRINT(
"vec[i]=%d, k=%d, offs=%d o=%d h=%d \n", vec[i], k,offs, o, h);
183 sum+=vec2[o+(h &
mask)]*wd;
203 uint32_t* index=
SG_MALLOC(uint32_t, stop);
205 int32_t num_vectors=stop-start;
214 SG_ERROR(
"Dimensions don't match, vec_len=%d, w_dim=%d\n", dim,
w_dim);
220 HASHEDWD_THREAD_PARAM params;
222 params.sub_index=NULL;
223 params.output=output;
226 params.alphas=alphas;
229 params.progress=
false;
236 pthread_t* threads =
SG_MALLOC(pthread_t, num_threads-1);
237 HASHEDWD_THREAD_PARAM* params =
SG_MALLOC(HASHEDWD_THREAD_PARAM, num_threads);
238 int32_t step= num_vectors/num_threads;
242 for (t=0; t<num_threads-1; t++)
245 params[t].sub_index=NULL;
246 params[t].output = output;
247 params[t].start = start+t*step;
248 params[t].stop = start+(t+1)*step;
249 params[t].alphas=alphas;
252 params[t].progress =
false;
253 params[t].index=index;
254 pthread_create(&threads[t], NULL,
259 params[t].sub_index=NULL;
260 params[t].output = output;
261 params[t].start = start+t*step;
262 params[t].stop = stop;
263 params[t].alphas=alphas;
266 params[t].progress =
false;
267 params[t].index=index;
270 for (t=0; t<num_threads-1; t++)
271 pthread_join(threads[t], NULL);
281 SG_INFO(
"prematurely stopped. \n");
290 uint32_t* index=
SG_MALLOC(uint32_t, num);
298 SG_ERROR(
"Dimensions don't match, vec_len=%d, w_dim=%d\n", dim,
w_dim);
304 HASHEDWD_THREAD_PARAM params;
306 params.sub_index=sub_index;
307 params.output=output;
310 params.alphas=alphas;
313 params.progress=
false;
320 pthread_t* threads =
SG_MALLOC(pthread_t, num_threads-1);
321 HASHEDWD_THREAD_PARAM* params =
SG_MALLOC(HASHEDWD_THREAD_PARAM, num_threads);
322 int32_t step= num/num_threads;
326 for (t=0; t<num_threads-1; t++)
329 params[t].sub_index=sub_index;
330 params[t].output = output;
331 params[t].start = t*step;
332 params[t].stop = (t+1)*step;
333 params[t].alphas=alphas;
336 params[t].progress =
false;
337 params[t].index=index;
338 pthread_create(&threads[t], NULL,
343 params[t].sub_index=sub_index;
344 params[t].output = output;
345 params[t].start = t*step;
346 params[t].stop = num;
347 params[t].alphas=alphas;
350 params[t].progress =
false;
351 params[t].index=index;
354 for (t=0; t<num_threads-1; t++)
355 pthread_join(threads[t], NULL);
365 SG_INFO(
"prematurely stopped. \n");
371 HASHEDWD_THREAD_PARAM* par=(HASHEDWD_THREAD_PARAM*) p;
373 int32_t* sub_index=par->sub_index;
375 int32_t start=par->start;
376 int32_t stop=par->stop;
380 bool progress=par->progress;
381 uint32_t* index=par->index;
392 for (int32_t j=start; j<stop; j++)
402 uint8_t* dim=transposed_strings[i+k].
string;
405 for (int32_t j=start; j<stop; j++)
407 uint8_t bval=dim[sub_index[j]];
413 output[j]+=vec[o + (h &
mask)]*wd;
417 offs+=partial_w_dim*
degree;
423 for (int32_t j=start; j<stop; j++)
426 output[j]=output[j]*alphas[sub_index[j]]/normalization_const+bias;
428 output[j]=output[j]/normalization_const+bias;
442 uint8_t* dim=transposed_strings[i+k].
string;
445 for (int32_t j=start; j<stop; j++)
452 output[j]+=vec[o + (h &
mask)]*wd;
456 offs+=partial_w_dim*
degree;
462 for (int32_t j=start; j<stop; j++)
465 output[j]=output[j]*alphas[j]/normalization_const+bias;
467 output[j]=output[j]/normalization_const+bias;
476 if (vec2_len !=
w_dim)
477 SG_ERROR(
"Dimensions don't match, vec2_dim=%d, w_dim=%d\n", vec2_len,
w_dim);
491 for (int32_t i=0; i<len; i++)
494 for (int32_t k=0; k<
degree && i+k<len; k++)
501 #ifdef DEBUG_HASHEDWD
502 SG_PRINT(
"offs=%d o=%d h=%d \n", offs, o, h);
503 SG_PRINT(
"vec[i]=%d, k=%d, offs=%d o=%d h=%d \n", vec[i], k,offs, o, h);
505 vec2[o+(h &
mask)]+=wd;
525 for (int32_t i=0; i<
degree; i++)
528 SG_DEBUG(
"created HashedWDFeaturesTransposed with d=%d (%d), alphabetsize=%d, "
529 "dim=%d partial_dim=%d num=%d, len=%d\n",
540 for (int32_t i=0; i<
degree; i++)
CHashedWDFeaturesTransposed()
virtual void free_feature_iterator(void *iterator)
void set_normalization_const(float64_t n=0)
virtual EFeatureType get_feature_type()=0
int32_t get_num_threads() const
float64_t normalization_const
virtual EFeatureType get_feature_type()
virtual int32_t get_num_vectors() const
static uint32_t IncrementalMurmurHash2(uint8_t data, uint32_t h)
#define SG_NOTIMPLEMENTED
The class Alphabet implements an alphabet and alphabet utility functions.
virtual void add_to_dense_vec(float64_t alpha, int32_t vec_idx1, float64_t *vec2, int32_t vec2_len, bool abs_val=false)
virtual int32_t get_num_vectors() const
Features that support dot products among other operations.
virtual void * get_feature_iterator(int32_t vector_index)
virtual EFeatureClass get_feature_class()=0
static void clear_cancel()
int32_t get_num_symbols() const
void free_feature_vector(ST *feat_vec, int32_t num, bool dofree)
virtual float64_t dot(int32_t vec_idx1, CDotFeatures *df, int32_t vec_idx2)
SGVector< ST > get_feature_vector(int32_t num)
static bool cancel_computations()
CStringFeatures< uint8_t > * strings
virtual bool get_next_feature(int32_t &index, float64_t &value, void *iterator)
SGString< uint8_t > * transposed_strings
The class Features is the base class of all feature objects.
virtual float64_t dense_dot(int32_t vec_idx1, const float64_t *vec2, int32_t vec2_len)
static void fill_vector(T *vec, int32_t len, T value)
virtual EFeatureClass get_feature_class()
virtual int32_t get_max_vector_length()
static void * dense_dot_range_helper(void *p)
void progress(float64_t current_val, float64_t min_val=0.0, float64_t max_val=1.0, int32_t decimals=1, const char *prefix="PROGRESS:\t")
Features that compute the Weighted Degreee Kernel feature space explicitly.
CAlphabet * get_alphabet()
virtual void dense_dot_range_subset(int32_t *sub_index, int32_t num, float64_t *output, float64_t *alphas, float64_t *vec, int32_t dim, float64_t b)
virtual ~CHashedWDFeaturesTransposed()
static float32_t sqrt(float32_t x)
x^0.5
#define SG_UNSTABLE(func,...)
bool have_same_length(int32_t len=-1)
#define SG_MALLOC(type, len)
virtual CFeatures * duplicate() const
virtual void dense_dot_range(float64_t *output, int32_t start, int32_t stop, float64_t *alphas, float64_t *vec, int32_t dim, float64_t b)
static T abs(T a)
return the absolute value of a number
CStringFeatures< ST > * get_transposed()