28 using namespace shogun;
30 #ifndef DOXYGEN_SHOULD_SKIP_THIS
46 #endif // DOXYGEN_SHOULD_SKIP_THIS
119 SG_DEBUG(
"deleting CWeightedDegreeStringKernel optimization\n");
143 int32_t lhs_changed=(
lhs!=l);
144 int32_t rhs_changed=(
rhs!=r);
148 SG_DEBUG(
"lhs_changed: %i\n", lhs_changed);
149 SG_DEBUG(
"rhs_changed: %i\n", rhs_changed);
156 SG_ERROR(
"All strings in WD kernel must have same length (lhs wrong)!\n");
159 SG_ERROR(
"All strings in WD kernel must have same length (rhs wrong)!\n");
185 SG_DEBUG(
"deleting CWeightedDegreeStringKernel optimization\n");
210 SG_DEBUG(
"deleting CWeightedDegreeStringKernel optimization\n");
215 SG_DEBUG(
"initializing CWeightedDegreeStringKernel optimization\n") ;
217 for (int32_t i=0; i<count; i++)
221 if ( (i % (count/10+1)) == 0)
264 char* avec, int32_t alen,
char* bvec, int32_t blen)
268 for (int32_t i=0; i<alen; i++)
271 int32_t mismatches=0;
273 for (int32_t j=0; (i+j<alen) && (j<
degree); j++)
275 if (avec[i+j]!=bvec[i+j])
292 char* avec, int32_t alen,
char* bvec, int32_t blen)
297 int32_t match_len=-1;
299 for (int32_t i=0; i<alen; i++)
301 if (avec[i]==bvec[i])
318 char* avec, int32_t alen,
char* bvec, int32_t blen)
322 for (int32_t i=0; i<alen; i++)
326 for (int32_t j=0; (i+j<alen) && (j<
degree); j++)
328 if (avec[i+j]!=bvec[i+j])
341 char* avec, int32_t alen,
char* bvec, int32_t blen)
345 for (int32_t i=0; i<alen; i++)
348 for (int32_t j=0; (i+j<alen) && (j<
degree); j++)
350 if (avec[i+j]!=bvec[i+j])
367 bool free_avec, free_bvec;
402 for (int32_t i=0; i<len; i++)
408 for (int32_t i=0; i<len; i++)
421 for (int32_t i=0; i<len; i++)
437 int32_t idx,
float64_t alpha, int32_t tree_num)
448 for (int32_t i=tree_num; i<tree_num+
degree && i<len; i++)
473 for (int32_t i=0; i<len; i++)
477 for (int32_t i=0; i<len; i++)
488 int32_t idx,
float64_t alpha, int32_t tree_num)
499 for (int32_t i=tree_num; i<len && i<tree_num+
degree; i++)
523 ASSERT(char_vec && len>0);
526 for (int32_t i=0; i<len; i++)
532 for (int32_t i=0; i<len; i++)
551 for (int32_t i=0; i<len; i++)
556 for (int32_t i=0; i<len; i++)
632 SG_ERROR(
"WD: Dimension mismatch (should be (seq_length | 1) x degree) got (%d x %d)\n", len,
degree);
649 for (int32_t i=0; i<
degree*len; i++)
677 for (int32_t i=0; i<len; i++)
752 for (int32_t i=1; i<
degree+1 ; i++)
755 for (int32_t i=degree+1; i<
seq_length+1 ; i++)
766 for (int32_t i=1; i<
degree+1 ; i++)
769 for (int32_t i=degree+1; i<
seq_length+1 ; i++)
779 for (int32_t i=1; i<
degree+1 ; i++)
782 for (int32_t i=degree+1; i<
seq_length+1 ; i++)
793 for (int32_t i=1; i<
degree+1 ; i++)
796 for (int32_t i=degree+1; i<
seq_length+1 ; i++)
829 S_THREAD_PARAM* params = (S_THREAD_PARAM*) p;
834 int32_t
length=params->length;
835 int32_t* vec=params->vec;
838 int32_t* vec_idx=params->vec_idx;
843 for (int32_t i=params->start; i<params->end; i++)
864 int32_t num_vec, int32_t* vec_idx,
float64_t* result, int32_t num_suppvec,
871 ASSERT(num_vec<=rhs->get_num_vectors());
881 int32_t* vec=
SG_MALLOC(int32_t, num_threads*num_feat);
886 for (int32_t j=0; j<num_feat; j++)
893 S_THREAD_PARAM params;
895 params.result=result;
899 params.factor=factor;
904 params.vec_idx=vec_idx;
917 pthread_t* threads =
SG_MALLOC(pthread_t, num_threads-1);
918 S_THREAD_PARAM* params =
SG_MALLOC(S_THREAD_PARAM, num_threads);
919 int32_t step= num_vec/num_threads;
922 for (t=0; t<num_threads-1; t++)
924 params[t].vec=&vec[num_feat*t];
925 params[t].result=result;
927 params[t].kernel=
this;
928 params[t].tries=
tries;
929 params[t].factor=factor;
931 params[t].start = t*step;
932 params[t].end = (t+1)*step;
934 params[t].vec_idx=vec_idx;
937 params[t].vec=&vec[num_feat*t];
938 params[t].result=result;
940 params[t].kernel=
this;
941 params[t].tries=
tries;
942 params[t].factor=factor;
944 params[t].start=t*step;
945 params[t].end=num_vec;
947 params[t].vec_idx=vec_idx;
950 for (t=0; t<num_threads-1; t++)
951 pthread_join(threads[t], NULL);
974 if (
lhs!=NULL &&
rhs!=NULL)
980 void CWeightedDegreeStringKernel::init()
1014 "weights",
"WD Kernel weights.");
1017 "Weights per position.");
1021 "Number of allowed mismatches.");
1023 "If block computation shall be used.");
1025 "WeightedDegree kernel type.");
1027 "Unqueal -1 if just a single degree is selected.");
1029 "Alphabet of Features.");
float64_t * block_weights
float64_t compute_with_mismatch(char *avec, int32_t alen, char *bvec, int32_t blen)
void add_example_to_tree_mismatch_recursion(int32_t tree, int32_t i, float64_t alpha, int32_t *vec, int32_t len_rem, int32_t degree_rec, int32_t mismatch_rec, int32_t max_mismatch, float64_t *weights)
void create_empty_tries()
float64_t * compute_abs_weights(int32_t &len)
void add_example_to_single_tree(int32_t idx, float64_t weight, int32_t tree_num)
bool init_block_weights_exp()
int32_t get_num_threads() const
bool init_block_weights_log()
float64_t compute_using_block(char *avec, int32_t alen, char *bvec, int32_t blen)
float64_t compute_by_tree_helper(int32_t *vec, int32_t len, int32_t seq_pos, int32_t tree_pos, int32_t weight_pos, float64_t *weights, bool degree_times_position_weights)
bool set_max_mismatch(int32_t max)
int32_t position_weights_len
friend class CFirstElementKernelNormalizer
EAlphabet get_alphabet() const
virtual ~CWeightedDegreeStringKernel()
virtual float64_t normalize_rhs(float64_t value, int32_t idx_rhs)=0
void add_example_to_single_tree_mismatch(int32_t idx, float64_t weight, int32_t tree_num)
void set_is_initialized(bool p_init)
The class Alphabet implements an alphabet and alphabet utility functions.
virtual void remove_lhs()
void add_to_trie(int32_t i, int32_t seq_offset, int32_t *vec, float32_t alpha, float64_t *weights, bool degree_times_position_weights)
bool set_position_weights(float64_t *pws, int32_t len)
virtual bool set_normalizer(CKernelNormalizer *normalizer_)
uint8_t remap_to_bin(uint8_t c)
void create(int32_t len, bool p_use_compact_terminal_nodes=true)
virtual void compute_batch(int32_t num_vec, int32_t *vec_idx, float64_t *target, int32_t num_suppvec, int32_t *IDX, float64_t *alphas, float64_t factor=1.0)
float64_t * weights_buffer
bool get_is_initialized()
static void * compute_batch_helper(void *p)
void add(bool *param, const char *name, const char *description="")
CWeightedDegreeStringKernel()
Class SGObject is the base class of all shogun objects.
bool set_wd_weights_by_type(EWDKernType type)
virtual bool init_optimization(int32_t count, int32_t *IDX, float64_t *alphas)
float64_t * position_weights
static void clear_cancel()
float64_t compute_without_mismatch(char *avec, int32_t alen, char *bvec, int32_t blen)
static int32_t pow(int32_t x, int32_t n)
bool init_block_weights_from_wd()
void free_feature_vector(ST *feat_vec, int32_t num, bool dofree)
void compute_by_tree(int32_t idx, float64_t *LevelContrib)
The Weighted Degree String kernel.
float64_t compute_without_mismatch_matrix(char *avec, int32_t alen, char *bvec, int32_t blen)
virtual bool init(CFeatures *l, CFeatures *r)
SGVector< ST > get_feature_vector(int32_t num)
static T max(T a, T b)
return the maximum of two integers
virtual bool delete_optimization()
void delete_trees(bool p_use_compact_terminal_nodes=true)
static bool cancel_computations()
bool init_block_weights_const()
virtual bool init_normalizer()
CFeatures * rhs
feature vectors to occur on right hand side
static int64_t nchoosek(int32_t n, int32_t k)
void add_vector(bool **param, index_t *length, const char *name, const char *description="")
CFeatures * lhs
feature vectors to occur on left hand side
The class Features is the base class of all feature objects.
static T min(T a, T b)
return the minimum of two integers
void add_example_to_tree(int32_t idx, float64_t weight)
static float64_t log(float64_t v)
virtual int32_t get_max_vector_length()
virtual void remove_lhs()
bool init_block_weights_sqpoly()
float64_t compute(int32_t idx_a, int32_t idx_b)
bool init_block_weights_from_wd_external()
CKernelNormalizer * normalizer
CAlphabet * get_alphabet()
float64_t * compute_abs_weights(int32_t &len)
bool init_block_weights_linear()
bool init_block_weights()
virtual float64_t normalize_lhs(float64_t value, int32_t idx_lhs)=0
void set_position_weights(float64_t *p_position_weights)
void add_matrix(bool **param, index_t *length_y, index_t *length_x, const char *name, const char *description="")
Template class StringKernel, is the base class of all String Kernels.
void add_example_to_tree_mismatch(int32_t idx, float64_t weight)
int32_t get_degree() const
bool set_weights(SGMatrix< float64_t > new_weights)
bool have_same_length(int32_t len=-1)
#define SG_MALLOC(type, len)
bool init_block_weights_cubicpoly()