SHOGUN  v1.1.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
DynProg.h
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 1999-2009 Gunnar Raetsch
8  * Written (W) 1999-2009 Soeren Sonnenburg
9  * Written (W) 2008-2009 Jonas Behr
10  * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society
11  */
12 
13 #ifndef __CDYNPROG_H__
14 #define __CDYNPROG_H__
15 
17 #include <shogun/lib/common.h>
18 #include <shogun/base/SGObject.h>
19 #include <shogun/io/SGIO.h>
20 #include <shogun/lib/config.h>
23 #include <shogun/structure/Plif.h>
30 #include <shogun/lib/Array.h>
31 #include <shogun/lib/Array2.h>
32 #include <shogun/lib/Array3.h>
33 #include <shogun/lib/Time.h>
34 
35 #include <stdio.h>
36 #include <limits.h>
37 
38 namespace shogun
39 {
40  template <class T> class CSparseFeatures;
41  class CIntronList;
42  class CPlifMatrix;
43  class CSegmentLoss;
44  template <class T> class CArray;
45 
46 //#define DYNPROG_TIMING
47 
48 #ifdef USE_BIGSTATES
49 typedef uint16_t T_STATES ;
50 #else
51 typedef uint8_t T_STATES ;
52 #endif
53 typedef T_STATES* P_STATES ;
54 
55 #ifndef DOXYGEN_SHOULD_SKIP_THIS
56 
57 struct segment_loss_struct
58 {
60  int32_t maxlookback;
62  int32_t seqlen;
64  int32_t *segments_changed;
66  float64_t *num_segment_id;
68  int32_t *length_segment_id ;
69 };
70 #endif
71 
77 class CDynProg : public CSGObject
78 {
79 public:
84  CDynProg(int32_t p_num_svms=8);
85  virtual ~CDynProg();
86 
87  // model related functions
93  void set_num_states(int32_t N);
94 
96  int32_t get_num_states();
97 
99  int32_t get_num_svms();
100 
106  void init_content_svm_value_array(const int32_t p_num_svms);
107 
115  void init_tiling_data(int32_t* probe_pos, float64_t* intensities, const int32_t num_probes);
116 
123  void precompute_tiling_plifs(CPlif** PEN, const int32_t* tiling_plif_ids, const int32_t num_tiling_plifs);
124 
129  void resize_lin_feat(int32_t num_new_feat);
135 
141 
146  void set_a(SGMatrix<float64_t> a);
147 
152  void set_a_id(SGMatrix<int32_t> a);
153 
159 
164  void init_mod_words_array(SGMatrix<int32_t> p_mod_words_array);
165 
171  bool check_svm_arrays();
172 
178 
185  int32_t get_num_positions();
186 
197 
202  void set_pos(SGVector<int32_t> pos);
203 
209  void set_orf_info(SGMatrix<int32_t> orf_info);
210 
215  void set_gene_string(SGVector<char> genestr);
216 
217 
222  void set_dict_weights(SGMatrix<float64_t> dictionary_weights);
223 
229 
236  void best_path_set_segment_ids_mask(int32_t* segment_ids, float64_t* segment_mask, int32_t m);
237 
240 
245  void set_plif_matrices(CPlifMatrix* pm);
246 
247  // best_path result retrieval functions
253 
259 
265 
266 
275  void compute_nbest_paths(int32_t max_num_signals,
276  bool use_orf, int16_t nbest, bool with_loss, bool with_multiple_sequences);
277 
279 
292  int32_t* my_state_seq, int32_t *my_pos_seq,
293  int32_t my_seq_len, const float64_t *seq_array, int32_t max_num_signals);
294 
295  // additional best_path_trans_deriv functions
300  void set_my_state_seq(int32_t* my_state_seq);
301 
306  void set_my_pos_seq(int32_t* my_pos_seq);
307 
315  void get_path_scores(float64_t** my_scores, int32_t* seq_len);
316 
324  void get_path_losses(float64_t** my_losses, int32_t* seq_len);
325 
326 
328  inline T_STATES get_N() const
329  {
330  return m_N ;
331  }
332 
337  inline void set_q(T_STATES offset, float64_t value)
338  {
339  m_end_state_distribution_q[offset]=value;
340  }
341 
346  inline void set_p(T_STATES offset, float64_t value)
347  {
348  m_initial_state_distribution_p[offset]=value;
349  }
350 
357  inline void set_a(T_STATES line_, T_STATES column, float64_t value)
358  {
359  m_transition_matrix_a.element(line_,column)=value; // look also best_path!
360  }
361 
367  inline float64_t get_q(T_STATES offset) const
368  {
369  return m_end_state_distribution_q[offset];
370  }
371 
377  inline float64_t get_q_deriv(T_STATES offset) const
378  {
379  return m_end_state_distribution_q_deriv[offset];
380  }
381 
387  inline float64_t get_p(T_STATES offset) const
388  {
389  return m_initial_state_distribution_p[offset];
390  }
391 
397  inline float64_t get_p_deriv(T_STATES offset) const
398  {
400  }
401 
406 
413  inline float64_t* get_lin_feat(int32_t & dim1, int32_t & dim2)
414  {
415  m_lin_feat.get_array_size(dim1, dim2);
416  return m_lin_feat.get_array();
417  }
426  inline void set_lin_feat(float64_t* p_lin_feat, int32_t p_num_svms, int32_t p_seq_len)
427  {
428  m_lin_feat.set_array(p_lin_feat, p_num_svms, p_seq_len, true, true);
429  }
434  void create_word_string();
435 
438  void precompute_stop_codons();
439 
446  inline float64_t get_a(T_STATES line_, T_STATES column) const
447  {
448  return m_transition_matrix_a.element(line_, column); // look also best_path()!
449  }
450 
457  inline float64_t get_a_deriv(T_STATES line_, T_STATES column) const
458  {
459  return m_transition_matrix_a_deriv.element(line_, column); // look also best_path()!
460  }
462 
467  void set_intron_list(CIntronList* intron_list, int32_t num_plifs);
468 
471  {
472  return m_seg_loss_obj;
473  }
474 
481  void long_transition_settings(bool use_long_transitions, int32_t threshold, int32_t max_len)
482  {
483  m_long_transitions = use_long_transitions;
484  m_long_transition_threshold = threshold;
485  SG_DEBUG("ignoring max_len\n") ;
486  //m_long_transition_max = max_len;
487  }
488 
489 protected:
490 
491  /* helper functions */
492 
502  void lookup_content_svm_values(const int32_t from_state,
503  const int32_t to_state, const int32_t from_pos, const int32_t to_pos,
504  float64_t* svm_values, int32_t frame);
505 
513  inline void lookup_tiling_plif_values(const int32_t from_state,
514  const int32_t to_state, const int32_t len, float64_t* svm_values);
515 
520  inline int32_t find_frame(const int32_t from_state);
521 
530  inline int32_t raw_intensities_interval_query(
531  const int32_t from_pos, const int32_t to_pos, float64_t* intensities, int32_t type);
532 
533 #ifndef DOXYGEN_SHOULD_SKIP_THIS
534 
535  struct svm_values_struct
536  {
538  int32_t maxlookback;
540  int32_t seqlen;
541 
543  int32_t* start_pos;
545  float64_t ** svm_values_unnormalized;
547  float64_t * svm_values;
549  bool *** word_used;
551  int32_t **num_unique_words;
552  };
553 #endif // DOXYGEN_SHOULD_SKIP_THIS
554 
563  bool extend_orf(int32_t orf_from, int32_t orf_to, int32_t start, int32_t &last_pos, int32_t to);
564 
566  inline virtual const char* get_name() const { return "DynProg"; }
567 
568 private:
569 
570  T_STATES trans_list_len;
571  T_STATES **trans_list_forward;
572  T_STATES *trans_list_forward_cnt;
573  float64_t **trans_list_forward_val;
574  int32_t **trans_list_forward_id;
575  bool mem_initialized;
576 
577 #ifdef DYNPROG_TIMING
578  CTime MyTime;
579  CTime MyTime2;
580  CTime MyTime3;
581 
582  float64_t segment_init_time;
583  float64_t segment_pos_time;
584  float64_t segment_clean_time;
585  float64_t segment_extend_time;
586  float64_t orf_time;
587  float64_t content_time;
588  float64_t content_penalty_time;
589  float64_t content_svm_values_time ;
590  float64_t content_plifs_time ;
591  float64_t svm_init_time;
592  float64_t svm_pos_time;
593  float64_t inner_loop_time;
594  float64_t inner_loop_max_time ;
595  float64_t svm_clean_time;
596  float64_t long_transition_time ;
597 #endif
598 
599 
600 protected:
605  int32_t m_N;
607 
612 
616 
620 
622 
624  int32_t m_num_degrees;
626  int32_t m_num_svms;
627 
650 
652 // CArray<int32_t> m_svm_pos_start;
658  int32_t m_max_a_id;
659 
660  // input arguments
666  int32_t m_seq_len;
693  uint16_t*** m_wordstr;
710 
714 
715  // output arguments
722 
729 
734 
738 
741 
747 
751  int32_t* m_probe_pos;
757  int32_t m_num_raw_data;
758 
768  //int32_t m_long_transition_max ;
769 
773  static int32_t word_degree_default[4];
774 
778  static int32_t cum_num_words_default[5];
779 
782  static int32_t frame_plifs[3];
783 
786  static int32_t num_words_default[4];
787 
789  static int32_t mod_words_default[32];
790 
792  static bool sign_words_default[16];
793 
795  static int32_t string_words_default[16];
796 };
797 }
798 #endif
Class Time that implements a stopwatch based on either cpu time or wall clock time.
Definition: Time.h:25
CArray2< int32_t > m_orf_info
Definition: DynProg.h:668
bool m_svm_arrays_clean
Definition: DynProg.h:656
T * get_array()
Definition: Array2.h:107
CPlifMatrix * m_plif_matrices
Definition: DynProg.h:728
float64_t get_a(T_STATES line_, T_STATES column) const
Definition: DynProg.h:446
CArray< int32_t > m_my_state_seq
Definition: DynProg.h:703
void best_path_trans_deriv(int32_t *my_state_seq, int32_t *my_pos_seq, int32_t my_seq_len, const float64_t *seq_array, int32_t max_num_signals)
Definition: DynProg.cpp:2053
T_STATES * P_STATES
Definition: HMM.h:66
uint16_t *** m_wordstr
Definition: DynProg.h:693
static int32_t cum_num_words_default[5]
Definition: DynProg.h:778
CArray< int32_t > m_segment_ids
Definition: DynProg.h:699
void set_lin_feat(float64_t *p_lin_feat, int32_t p_num_svms, int32_t p_seq_len)
Definition: DynProg.h:426
virtual ~CDynProg()
Definition: DynProg.cpp:147
CArray< int32_t > m_my_pos_seq
Definition: DynProg.h:705
void set_dict_weights(SGMatrix< float64_t > dictionary_weights)
Definition: DynProg.cpp:784
void set_my_state_seq(int32_t *my_state_seq)
Definition: DynProg.cpp:768
CArray< char > m_genestr
Definition: DynProg.h:678
void set_plif_matrices(CPlifMatrix *pm)
Definition: DynProg.cpp:751
CArray2< int32_t > m_states
Definition: DynProg.h:719
void create_word_string()
Definition: DynProg.cpp:366
int32_t m_N
number of states
Definition: DynProg.h:606
CArray< CPlifBase * > m_plif_list
Definition: DynProg.h:672
T_STATES get_N() const
access function for number of states N
Definition: DynProg.h:328
void set_observation_matrix(SGNDArray< float64_t > seq)
Definition: DynProg.cpp:662
float64_t get_a_deriv(T_STATES line_, T_STATES column) const
Definition: DynProg.h:457
CArray< float64_t > m_end_state_distribution_q_deriv
Definition: DynProg.h:619
bool m_long_transitions
Definition: DynProg.h:760
CArray2< int32_t > m_positions
Definition: DynProg.h:721
CArray< float64_t > m_segment_mask
Definition: DynProg.h:701
int32_t m_max_a_id
Definition: DynProg.h:658
const T & element(int32_t idx1, int32_t idx2) const
Definition: Array2.h:179
void set_gene_string(SGVector< char > genestr)
Definition: DynProg.cpp:760
void set_orf_info(SGMatrix< int32_t > orf_info)
Definition: DynProg.cpp:728
void get_path_scores(float64_t **my_scores, int32_t *seq_len)
Definition: DynProg.cpp:868
bool check_svm_arrays()
Definition: DynProg.cpp:606
CArray< float64_t > m_end_state_distribution_q
distribution of end-states
Definition: DynProg.h:618
int32_t * m_cum_num_words_array
Definition: DynProg.h:633
SGVector< float64_t > get_scores()
Definition: DynProg.cpp:840
CArray2< CPlifBase * > m_PEN_state_signals
Definition: DynProg.h:676
CArray2< int32_t > m_transition_matrix_a_id
transition matrix
Definition: DynProg.h:609
float64_t get_q(T_STATES offset) const
Definition: DynProg.h:367
int32_t * m_mod_words_array
Definition: DynProg.h:641
int32_t get_num_svms()
Definition: DynProg.cpp:195
void get_array_size(int32_t &dim1, int32_t &dim2)
Definition: Array2.h:73
class IntronList
Definition: SegmentLoss.h:26
float64_t get_p(T_STATES offset) const
Definition: DynProg.h:387
int32_t raw_intensities_interval_query(const int32_t from_pos, const int32_t to_pos, float64_t *intensities, int32_t type)
Definition: DynProg.cpp:2455
CArray< int32_t > m_num_unique_words
Definition: DynProg.h:654
int32_t m_num_intron_plifs
Definition: DynProg.h:740
void best_path_set_segment_ids_mask(int32_t *segment_ids, float64_t *segment_mask, int32_t m)
Definition: DynProg.cpp:820
void set_pos(SGVector< int32_t > pos)
Definition: DynProg.cpp:722
int32_t m_seq_len
Definition: DynProg.h:666
void set_array(T *p_array, int32_t dim1, int32_t dim2, bool p_free_array=true, bool copy_array=false)
Definition: Array2.h:126
int32_t get_num_positions()
Definition: DynProg.cpp:684
CArray< float64_t > m_initial_state_distribution_p_deriv
Definition: DynProg.h:615
CDynProg(int32_t p_num_svms=8)
Definition: DynProg.cpp:48
CArray2< float64_t > m_dict_weights
Definition: DynProg.h:695
CArray3< float64_t > m_observation_matrix
Definition: DynProg.h:662
int32_t m_num_degrees
Definition: DynProg.h:624
CSparseFeatures< float64_t > * m_seq_sparse1
Definition: DynProg.h:724
CSparseFeatures< float64_t > * m_seq_sparse2
Definition: DynProg.h:726
class Plif
Definition: Plif.h:37
int32_t * m_num_probes_cum
Definition: DynProg.h:753
CArray< bool > m_genestr_stop
Definition: DynProg.h:733
CArray< float64_t > m_my_scores
Definition: DynProg.h:707
void precompute_content_values()
Definition: DynProg.cpp:397
int32_t * m_num_lin_feat_plifs_cum
Definition: DynProg.h:755
CArray< float64_t > m_initial_state_distribution_p
initial distribution of states
Definition: DynProg.h:614
class IntronList
Definition: IntronList.h:20
int32_t find_frame(const int32_t from_state)
void init_mod_words_array(SGMatrix< int32_t > p_mod_words_array)
Definition: DynProg.cpp:584
void set_a(SGMatrix< float64_t > a)
Definition: DynProg.cpp:462
void set_p(T_STATES offset, float64_t value)
Definition: DynProg.h:346
void set_intron_list(CIntronList *intron_list, int32_t num_plifs)
Definition: DynProg.cpp:2534
Class SGObject is the base class of all shogun objects.
Definition: SGObject.h:76
void set_q_vector(SGVector< float64_t > q)
Definition: DynProg.cpp:455
CArray2< float64_t > m_segment_sum_weights
Definition: DynProg.h:670
CArray< int32_t > m_word_degree
Definition: DynProg.h:629
CArray< int32_t > m_string_words
Definition: DynProg.h:647
CArray< float64_t > m_scores
Definition: DynProg.h:717
static int32_t mod_words_default[32]
Definition: DynProg.h:789
CArray2< CPlifBase * > m_PEN
Definition: DynProg.h:674
void resize_lin_feat(int32_t num_new_feat)
Definition: DynProg.cpp:284
void precompute_stop_codons()
Definition: DynProg.cpp:200
void lookup_content_svm_values(const int32_t from_state, const int32_t to_state, const int32_t from_pos, const int32_t to_pos, float64_t *svm_values, int32_t frame)
Definition: DynProg.cpp:2481
double float64_t
Definition: common.h:56
CArray2< int32_t > m_mod_words
Definition: DynProg.h:639
SGMatrix< int32_t > get_positions()
Definition: DynProg.cpp:858
int32_t * m_string_words_array
Definition: DynProg.h:649
SGMatrix< int32_t > get_states()
Definition: DynProg.cpp:848
static int32_t frame_plifs[3]
Definition: DynProg.h:782
void best_path_set_segment_loss(SGMatrix< float64_t > segment_loss)
Definition: DynProg.cpp:803
void set_content_type_array(SGMatrix< float64_t > seg_path)
Definition: DynProg.cpp:689
void precompute_tiling_plifs(CPlif **PEN, const int32_t *tiling_plif_ids, const int32_t num_tiling_plifs)
Definition: DynProg.cpp:317
int32_t m_long_transition_threshold
Definition: DynProg.h:763
float64_t get_q_deriv(T_STATES offset) const
Definition: DynProg.h:377
float64_t * get_lin_feat(int32_t &dim1, int32_t &dim2)
Definition: DynProg.h:413
void set_a(T_STATES line_, T_STATES column, float64_t value)
Definition: DynProg.h:357
void set_a_trans_matrix(SGMatrix< float64_t > a_trans)
Definition: DynProg.cpp:483
CArray2< float64_t > m_transition_matrix_a_deriv
Definition: DynProg.h:611
float64_t * m_raw_intensities
Definition: DynProg.h:749
void set_q(T_STATES offset, float64_t value)
Definition: DynProg.h:337
CArray< int32_t > m_num_words
Definition: DynProg.h:635
void set_my_pos_seq(int32_t *my_pos_seq)
Definition: DynProg.cpp:776
uint8_t T_STATES
Definition: HMM.h:64
float64_t get_p_deriv(T_STATES offset) const
Definition: DynProg.h:397
#define SG_DEBUG(...)
Definition: SGIO.h:72
CArray< int32_t > m_pos
Definition: DynProg.h:664
int32_t get_num_states()
Definition: DynProg.cpp:239
int32_t * m_probe_pos
Definition: DynProg.h:751
Dynamic Programming Class.
Definition: DynProg.h:77
CArray3< float64_t > m_segment_loss
Definition: DynProg.h:697
int32_t * m_num_words_array
Definition: DynProg.h:637
void set_p_vector(SGVector< float64_t > p)
Definition: DynProg.cpp:447
CArray2< float64_t > m_lin_feat
Definition: DynProg.h:746
void get_path_losses(float64_t **my_losses, int32_t *seq_len)
Definition: DynProg.cpp:882
CIntronList * m_intron_list
Definition: DynProg.h:737
int32_t m_num_raw_data
Definition: DynProg.h:757
void init_tiling_data(int32_t *probe_pos, float64_t *intensities, const int32_t num_probes)
Definition: DynProg.cpp:244
virtual const char * get_name() const
Definition: DynProg.h:566
CArray< bool > m_sign_words
Definition: DynProg.h:643
int32_t m_num_svms
Definition: DynProg.h:626
static bool sign_words_default[16]
Definition: DynProg.h:792
static int32_t word_degree_default[4]
Definition: DynProg.h:773
void set_num_states(int32_t N)
Definition: DynProg.cpp:223
void long_transition_settings(bool use_long_transitions, int32_t threshold, int32_t max_len)
Definition: DynProg.h:481
CSegmentLoss * m_seg_loss_obj
Definition: DynProg.h:713
CSegmentLoss * get_segment_loss_object()
Definition: DynProg.h:470
shogun n-dimensional array
Definition: DataType.h:374
bool extend_orf(int32_t orf_from, int32_t orf_to, int32_t start, int32_t &last_pos, int32_t to)
Definition: DynProg.cpp:898
static int32_t string_words_default[16]
Definition: DynProg.h:795
static int32_t num_words_default[4]
Definition: DynProg.h:786
CArray< int32_t > m_cum_num_words
Definition: DynProg.h:631
CArray2< float64_t > m_transition_matrix_a
Definition: DynProg.h:610
CArray< float64_t > m_my_losses
Definition: DynProg.h:709
void init_content_svm_value_array(const int32_t p_num_svms)
Definition: DynProg.cpp:274
void compute_nbest_paths(int32_t max_num_signals, bool use_orf, int16_t nbest, bool with_loss, bool with_multiple_sequences)
Definition: DynProg.cpp:949
void lookup_tiling_plif_values(const int32_t from_state, const int32_t to_state, const int32_t len, float64_t *svm_values)
store plif arrays for all transitions in the model
Definition: PlifMatrix.h:28
bool * m_sign_words_array
Definition: DynProg.h:645
void set_sparse_features(CSparseFeatures< float64_t > *seq_sparse1, CSparseFeatures< float64_t > *seq_sparse2)
Definition: DynProg.cpp:737
void set_a_id(SGMatrix< int32_t > a)
Definition: DynProg.cpp:470

SHOGUN Machine Learning Toolbox - Documentation