34 using namespace shogun;
44 float64_t* AA_matrix_, int32_t nr, int32_t nc,
46 alphabet(NULL), degree(degree_), max_mismatch(max_mismatch_), width(width_)
60 :
CStringKernel<char>(size), alphabet(NULL), degree(degree_), max_mismatch(max_mismatch_), width(width_)
85 int32_t lhs_changed=(
lhs!=l);
86 int32_t rhs_changed=(
rhs!=r);
90 SG_DEBUG(
"lhs_changed: %i\n", lhs_changed);
91 SG_DEBUG(
"rhs_changed: %i\n", rhs_changed);
124 for (
unsigned int i=0; i<path.size(); i++)
126 if (path[i]!=joint_seq[index+i])
128 diff +=
AA_matrix[ (path[i]-1)*128 + path[i] - 1] ;
129 diff -= 2*
AA_matrix[ (path[i]-1)*128 + joint_seq[index+i] - 1] ;
130 diff +=
AA_matrix[ (joint_seq[index+i]-1)*128 + joint_seq[index+i] - 1] ;
134 return exp( - diff/
width) ;
228 std::vector<struct joint_list_struct> &joint_list,
229 std::string path,
unsigned int d)
231 const char* AA =
"ACDEFGHIKLMNPQRSTVWY" ;
232 const unsigned int num_AA = strlen(AA) ;
234 assert(path.size()==d) ;
236 for (
unsigned int i=0; i<num_AA; i++)
238 std::vector<struct joint_list_struct> joint_list_ ;
241 fprintf(stderr,
"i=%i: ", i) ;
247 fprintf(stdout,
"*") ;
252 fprintf(stdout,
"+") ;
256 for (
unsigned int j=0; j<joint_list.size(); j++)
258 if (joint_seq[joint_list[j].index+d] != AA[i])
260 if (joint_list[j].mismatch+1 <= (
unsigned int)
max_mismatch)
262 struct joint_list_struct list_item ;
263 list_item = joint_list[j] ;
264 list_item.mismatch = joint_list[j].mismatch+1 ;
265 joint_list_.push_back(list_item) ;
269 joint_list_.push_back(joint_list[j]) ;
272 if (joint_list_.size()>0)
274 std::string path_ = path + AA[i] ;
276 if (d+1 < (
unsigned int)
degree)
286 for (
unsigned int j=0; j<joint_list_.size(); j++)
290 feats[joint_list_[j].ex_index]++ ;
296 if (joint_list_[j].mismatch!=0)
297 feats[joint_list_[j].ex_index] +=
AA_helper(path_, joint_seq, joint_list_[j].index) ;
299 feats[joint_list_[j].ex_index] ++ ;
303 std::vector<int> idx ;
308 for (
unsigned int r=0; r<idx.size(); r++)
309 for (
unsigned int s=r; s<idx.size(); s++)
320 fprintf(stdout,
"\n") ;
326 std::string joint_seq ;
327 std::vector<struct joint_list_struct> joint_list ;
342 for (
int apos=0; apos+
degree-1<alen; apos++)
344 struct joint_list_struct list_item ;
345 list_item.ex_index = i ;
346 list_item.index = apos+joint_seq.size() ;
347 list_item.mismatch = 0 ;
349 joint_list.push_back(list_item) ;
351 joint_seq += std::string(avec, alen) ;
418 if (nr!=128 || nc!=128)
419 SG_ERROR(
"AA_matrix should be of shape 128x128\n");
435 if (
lhs!=NULL &&
rhs!=NULL)
457 void CSpectrumMismatchRBFKernel::init()
float64_t AA_helper(std::string &path, const char *joint_seq, unsigned int index)
bool set_max_mismatch(int32_t max)
void compute_helper_all(const char *joint_seq, std::vector< struct joint_list_struct > &joint_list, std::string path, unsigned int d)
bool resize_array(int32_t dim1, int32_t dim2)
virtual void register_params()
const T & element(int32_t idx1, int32_t idx2) const
EAlphabet get_alphabet() const
virtual int32_t get_num_vectors() const =0
The class Alphabet implements an alphabet and alphabet utility functions.
virtual ~CSpectrumMismatchRBFKernel()
virtual bool init(CFeatures *l, CFeatures *r)
void add(bool *param, const char *name, const char *description="")
bool resize_array(int32_t n)
Class SGObject is the base class of all shogun objects.
bool set_element(const T &p_element, int32_t idx1, int32_t idx2)
bool set_AA_matrix(float64_t *AA_matrix_=NULL, int32_t nr=128, int32_t nc=128)
virtual void remove_lhs()
float64_t compute(int32_t idx_a, int32_t idx_b)
int32_t get_array_size() const
virtual bool init_normalizer()
CFeatures * rhs
feature vectors to occur on right hand side
void add_vector(bool **param, index_t *length, const char *name, const char *description="")
CFeatures * lhs
feature vectors to occur on left hand side
The class Features is the base class of all feature objects.
CSpectrumMismatchRBFKernel()
virtual void remove_lhs()
int32_t kernel_matrix_length
CAlphabet * get_alphabet()
const T & get_element(int32_t idx1, int32_t idx2) const
CArray2< float64_t > kernel_matrix
Template class StringKernel, is the base class of all String Kernels.
#define SG_MALLOC(type, len)