25 #define VAL_MACRO log((default_value == 0) ? (CMath::random(MIN_RAND, MAX_RAND)) : default_value)
26 #define ARRAY_SIZE 65336
28 using namespace shogun;
75 const char Model::FIX_DISALLOWED=0 ;
76 const char Model::FIX_ALLOWED=1 ;
77 const char Model::FIX_DEFAULT=-1 ;
116 fix_pos_state[i] = FIX_DEFAULT ;
152 :
CDistribution(), iterations(150), epsilon(1e-4), conv_it(5)
154 #ifdef USE_HMMPARALLEL_STRUCTURES
166 :
CDistribution(), iterations(150), epsilon(1e-4), conv_it(5)
172 #ifdef USE_HMMPARALLEL_STRUCTURES
182 :
CDistribution(), iterations(150), epsilon(1e-4), conv_it(5)
188 #ifdef USE_HMMPARALLEL_STRUCTURES
197 :
CDistribution(), iterations(150), epsilon(1e-4), conv_it(5)
203 trans_list_forward = NULL ;
204 trans_list_forward_cnt = NULL ;
205 trans_list_forward_val = NULL ;
206 trans_list_backward = NULL ;
207 trans_list_backward_cnt = NULL ;
209 mem_initialized = false ;
220 #ifdef USE_HMMPARALLEL_STRUCTURES
236 mem_initialized = true ;
251 :
CDistribution(), iterations(150), epsilon(1e-4), conv_it(5)
258 trans_list_forward = NULL ;
259 trans_list_forward_cnt = NULL ;
260 trans_list_forward_val = NULL ;
261 trans_list_backward = NULL ;
262 trans_list_backward_cnt = NULL ;
264 mem_initialized = false ;
275 #ifdef USE_HMMPARALLEL_STRUCTURES
291 mem_initialized = true ;
293 trans_list_forward_cnt=NULL ;
300 for (int32_t j=0; j<
N; j++)
302 int32_t old_start_idx=start_idx;
304 while (start_idx<num_trans && a_trans[start_idx+num_trans]==j)
308 if (start_idx>1 && start_idx<num_trans)
309 ASSERT(a_trans[start_idx+num_trans-1]<=
310 a_trans[start_idx+num_trans]);
313 if (start_idx>1 && start_idx<num_trans)
314 ASSERT(a_trans[start_idx+num_trans-1]<=
315 a_trans[start_idx+num_trans]);
317 int32_t len=start_idx-old_start_idx;
320 trans_list_forward_cnt[j] = 0 ;
329 trans_list_forward[j] = NULL;
330 trans_list_forward_val[j] = NULL;
334 for (int32_t i=0; i<num_trans; i++)
336 int32_t from = (int32_t)a_trans[i+num_trans] ;
337 int32_t to = (int32_t)a_trans[i] ;
340 ASSERT(from>=0 && from<N);
343 trans_list_forward[from][trans_list_forward_cnt[from]]=to ;
344 trans_list_forward_val[from][trans_list_forward_cnt[from]]=val ;
345 trans_list_forward_cnt[from]++ ;
361 :
CDistribution(), iterations(150), epsilon(1e-4), conv_it(5)
363 #ifdef USE_HMMPARALLEL_STRUCTURES
374 if (trans_list_forward_cnt)
375 SG_FREE(trans_list_forward_cnt);
376 if (trans_list_backward_cnt)
377 SG_FREE(trans_list_backward_cnt);
378 if (trans_list_forward)
380 for (int32_t i=0; i<trans_list_len; i++)
381 if (trans_list_forward[i])
382 SG_FREE(trans_list_forward[i]);
385 if (trans_list_forward_val)
387 for (int32_t i=0; i<trans_list_len; i++)
388 if (trans_list_forward_val[i])
389 SG_FREE(trans_list_forward_val[i]);
390 SG_FREE(trans_list_forward_val);
392 if (trans_list_backward)
394 for (int32_t i=0; i<trans_list_len; i++)
395 if (trans_list_backward[i])
396 SG_FREE(trans_list_backward[i]);
404 #ifdef USE_HMMPARALLEL_STRUCTURES
417 #else // USE_HMMPARALLEL_STRUCTURES
422 #endif // USE_HMMPARALLEL_STRUCTURES
428 #ifdef USE_LOGSUMARRAY
429 #ifdef USE_HMMPARALLEL_STRUCTURES
435 #else //USE_HMMPARALLEL_STRUCTURES
437 #endif //USE_HMMPARALLEL_STRUCTURES
438 #endif //USE_LOGSUMARRAY
442 #ifdef USE_HMMPARALLEL_STRUCTURES
447 #endif //USE_HMMPARALLEL_STRUCTURES
459 SG_ERROR(
"Expected features of class string type word\n");
480 #ifdef USE_HMMPARALLEL_STRUCTURES
486 #else //USE_HMMPARALLEL_STRUCTURES
489 #endif //USE_HMMPARALLEL_STRUCTURES
492 #ifdef USE_HMMPARALLEL_STRUCTURES
495 #else //USE_HMMPARALLEL_STRUCTURES
497 #endif //USE_HMMPARALLEL_STRUCTURES
498 #endif //LOG_SUMARRAY
504 #ifdef USE_HMMPARALLEL_STRUCTURES
506 #else //USE_HMMPARALLEL_STRUCTURES
508 #endif //USE_HMMPARALLEL_STRUCTURES
525 #ifdef USE_HMMPARALLEL_STRUCTURES
563 trans_list_forward = NULL ;
564 trans_list_forward_cnt = NULL ;
565 trans_list_forward_val = NULL ;
566 trans_list_backward = NULL ;
567 trans_list_backward_cnt = NULL ;
569 mem_initialized = false ;
580 #ifdef USE_HMMPARALLEL_STRUCTURES
588 this->beta_cache[i].table=NULL;
590 this->beta_cache[i].dimension=0;
591 this->states_per_observation_psi[i]=NULL ;
594 #else // USE_HMMPARALLEL_STRUCTURES
596 this->beta_cache.table=NULL;
598 this->beta_cache.dimension=0;
599 this->states_per_observation_psi=NULL ;
600 #endif //USE_HMMPARALLEL_STRUCTURES
605 #ifdef USE_HMMPARALLEL_STRUCTURES
614 #else // USE_HMMPARALLEL_STRUCTURES
617 #endif //USE_HMMPARALLEL_STRUCTURES
619 #ifdef USE_HMMPARALLEL_STRUCTURES
622 #endif //USE_HMMPARALLEL_STRUCTURES
625 #ifdef USE_HMMPARALLEL_STRUCTURES
627 #endif // USE_HMMPARALLEL_STRUCTURES
628 #endif //LOG_SUMARRAY
633 mem_initialized = true ;
636 return ((files_ok) &&
656 int32_t wanted_time=time;
658 if (ALPHA_CACHE(dimension).table)
660 alpha=&ALPHA_CACHE(dimension).table[0];
661 alpha_new=&ALPHA_CACHE(dimension).table[
N];
675 for (int32_t i=0; i<
N; i++)
682 for (int32_t j=0; j<
N; j++)
684 register int32_t i, num = trans_list_forward_cnt[j] ;
686 for (i=0; i < num; i++)
688 int32_t ii = trans_list_forward[j][i] ;
695 if (!ALPHA_CACHE(dimension).table)
709 if (time<p_observations->get_vector_length(dimension))
711 register int32_t i, num=trans_list_forward_cnt[state];
713 for (i=0; i<num; i++)
715 int32_t ii = trans_list_forward[state][i] ;
730 if (!ALPHA_CACHE(dimension).table)
734 ALPHA_CACHE(dimension).dimension=dimension;
735 ALPHA_CACHE(dimension).updated=
true;
736 ALPHA_CACHE(dimension).sum=sum;
738 if (wanted_time<p_observations->get_vector_length(dimension))
739 return ALPHA_CACHE(dimension).table[wanted_time*N+state];
741 return ALPHA_CACHE(dimension).sum;
759 int32_t wanted_time=time;
761 if (ALPHA_CACHE(dimension).table)
763 alpha=&ALPHA_CACHE(dimension).table[0];
764 alpha_new=&ALPHA_CACHE(dimension).table[
N];
778 for (int32_t i=0; i<
N; i++)
785 for (int32_t j=0; j<
N; j++)
788 #ifdef USE_LOGSUMARRAY
789 for (i=0; i<(N>>1); i++)
791 alpha[(i<<1)+1] +
get_a((i<<1)+1,j));
795 CMath::logarithmic_sum_array(ARRAYS(dimension), N>>1)) ;
798 #else //USE_LOGSUMARRAY
804 #endif //USE_LOGSUMARRAY
807 if (!ALPHA_CACHE(dimension).table)
821 if (time<p_observations->get_vector_length(dimension))
824 #ifdef USE_LOGSUMARRAY
825 for (i=0; i<(N>>1); i++)
827 alpha[(i<<1)+1] +
get_a((i<<1)+1,state));
831 CMath::logarithmic_sum_array(ARRAYS(dimension), N>>1)) ;
834 #else //USE_LOGSUMARRAY
840 #endif //USE_LOGSUMARRAY
847 #ifdef USE_LOGSUMARRAY
848 for (i=0; i<(N>>1); i++)
850 alpha[(i<<1)+1] +
get_q((i<<1)+1));
853 CMath::logarithmic_sum_array(ARRAYS(dimension), N>>1)) ;
855 sum=CMath::logarithmic_sum_array(ARRAYS(dimension), N>>1) ;
856 #else //USE_LOGSUMARRAY
860 #endif //USE_LOGSUMARRAY
862 if (!ALPHA_CACHE(dimension).table)
866 ALPHA_CACHE(dimension).dimension=dimension;
867 ALPHA_CACHE(dimension).updated=
true;
868 ALPHA_CACHE(dimension).sum=sum;
870 if (wanted_time<p_observations->get_vector_length(dimension))
871 return ALPHA_CACHE(dimension).table[wanted_time*N+state];
873 return ALPHA_CACHE(dimension).sum;
888 int32_t wanted_time=time;
891 forward(time, state, dimension);
893 if (BETA_CACHE(dimension).table)
912 for (
register int32_t i=0; i<
N; i++)
918 for (
register int32_t i=0; i<
N; i++)
920 register int32_t j, num=trans_list_backward_cnt[i] ;
922 for (j=0; j<num; j++)
924 int32_t jj = trans_list_backward[i][j] ;
930 if (!BETA_CACHE(dimension).table)
945 register int32_t j, num=trans_list_backward_cnt[state] ;
947 for (j=0; j<num; j++)
949 int32_t jj = trans_list_backward[state][j] ;
956 if (BETA_CACHE(dimension).table)
959 for (
register int32_t j=0; j<
N; j++)
961 BETA_CACHE(dimension).sum=sum;
962 BETA_CACHE(dimension).dimension=dimension;
963 BETA_CACHE(dimension).updated=
true;
965 if (wanted_time<p_observations->get_vector_length(dimension))
966 return BETA_CACHE(dimension).table[wanted_time*N+state];
968 return BETA_CACHE(dimension).sum;
973 for (
register int32_t j=0; j<
N; j++)
983 int32_t time, int32_t state, int32_t dimension)
988 int32_t wanted_time=time;
991 forward(time, state, dimension);
993 if (BETA_CACHE(dimension).table)
1008 return get_q(state);
1012 for (
register int32_t i=0; i<
N; i++)
1018 for (
register int32_t i=0; i<
N; i++)
1020 register int32_t j ;
1021 #ifdef USE_LOGSUMARRAY
1022 for (j=0; j<(N>>1); j++)
1028 CMath::logarithmic_sum_array(ARRAYS(dimension), N>>1)) ;
1030 beta_new[i]=CMath::logarithmic_sum_array(ARRAYS(dimension), N>>1) ;
1031 #else //USE_LOGSUMARRAY
1037 #endif //USE_LOGSUMARRAY
1040 if (!BETA_CACHE(dimension).table)
1055 register int32_t j ;
1056 #ifdef USE_LOGSUMARRAY
1057 for (j=0; j<(N>>1); j++)
1063 CMath::logarithmic_sum_array(ARRAYS(dimension), N>>1)) ;
1065 return CMath::logarithmic_sum_array(ARRAYS(dimension), N>>1) ;
1066 #else //USE_LOGSUMARRAY
1072 #endif //USE_LOGSUMARRAY
1076 if (BETA_CACHE(dimension).table)
1078 #ifdef USE_LOGSUMARRAY//AAA
1079 for (int32_t j=0; j<(N>>1); j++)
1084 CMath::logarithmic_sum_array(ARRAYS(dimension), N>>1)) ;
1086 BETA_CACHE(dimension).sum=CMath::logarithmic_sum_array(ARRAYS(dimension), N>>1) ;
1087 #else //USE_LOGSUMARRAY
1089 for (
register int32_t j=0; j<
N; j++)
1091 BETA_CACHE(dimension).sum=sum;
1092 #endif //USE_LOGSUMARRAY
1093 BETA_CACHE(dimension).dimension=dimension;
1094 BETA_CACHE(dimension).updated=
true;
1096 if (wanted_time<p_observations->get_vector_length(dimension))
1097 return BETA_CACHE(dimension).table[wanted_time*N+state];
1099 return BETA_CACHE(dimension).sum;
1104 for (
register int32_t j=0; j<
N; j++)
1123 SG_INFO(
"computing full viterbi likelihood\n") ;
1135 if (!STATES_PER_OBSERVATION_PSI(dimension))
1141 if (PATH_PROB_UPDATED(dimension) && dimension==PATH_PROB_DIMENSION(dimension))
1145 register float64_t* delta= ARRAYN2(dimension);
1146 register float64_t* delta_new= ARRAYN1(dimension);
1149 for (
register int32_t i=0; i<
N; i++)
1156 #ifdef USE_PATHDEBUG
1163 register int32_t NN=
N ;
1164 for (
register int32_t j=0; j<NN; j++)
1167 register float64_t maxj=delta[0] + matrix_a[0];
1168 register int32_t argmax=0;
1170 for (
register int32_t i=1; i<NN; i++)
1172 register float64_t temp = delta[i] + matrix_a[i];
1181 if ((!
model) || (
model->get_fix_pos_state(t,j,NN)!=Model::FIX_DISALLOWED))
1188 set_psi(t, j, argmax, dimension);
1191 #ifdef USE_PATHDEBUG
1193 for (int32_t jj=0; jj<
N; jj++)
1194 if (delta_new[jj]>best)
1195 best=delta_new[jj] ;
1199 SG_DEBUG(
"worst case at %i: %e:%e\n", t, best, worst) ;
1212 register int32_t argmax=0;
1214 for (
register int32_t i=1; i<
N; i++)
1232 PATH(dimension)[t-1]=
get_psi(t, PATH(dimension)[t], dimension);
1235 PATH_PROB_UPDATED(dimension)=
true;
1236 PATH_PROB_DIMENSION(dimension)=dimension;
1241 #ifndef USE_HMMPARALLEL
1260 SG_INFO(
"computing full model probablity\n");
1265 params[cpu].hmm=this ;
1272 pthread_create(&threads[cpu], NULL, bw_dim_prefetch, (
void*)¶ms[cpu]);
1277 pthread_join(threads[cpu], NULL);
1296 void* CHMM::bw_dim_prefetch(
void* params)
1298 CHMM* hmm=((S_BW_THREAD_PARAM*) params)->hmm;
1299 int32_t start=((S_BW_THREAD_PARAM*) params)->dim_start;
1300 int32_t stop=((S_BW_THREAD_PARAM*) params)->dim_stop;
1301 float64_t* p_buf=((S_BW_THREAD_PARAM*) params)->p_buf;
1302 float64_t* q_buf=((S_BW_THREAD_PARAM*) params)->q_buf;
1303 float64_t* a_buf=((S_BW_THREAD_PARAM*) params)->a_buf;
1304 float64_t* b_buf=((S_BW_THREAD_PARAM*) params)->b_buf;
1305 ((S_BW_THREAD_PARAM*)params)->ret=0;
1307 for (int32_t dim=start; dim<stop; dim++)
1312 hmm->ab_buf_comp(p_buf, q_buf, a_buf, b_buf, dim) ;
1313 ((S_BW_THREAD_PARAM*)params)->ret+= modprob;
1318 void* CHMM::bw_single_dim_prefetch(
void * params)
1320 CHMM* hmm=((S_BW_THREAD_PARAM*)params)->hmm ;
1321 int32_t dim=((S_DIM_THREAD_PARAM*)params)->dim ;
1326 void* CHMM::vit_dim_prefetch(
void * params)
1328 CHMM* hmm=((S_DIM_THREAD_PARAM*)params)->hmm ;
1329 int32_t dim=((S_DIM_THREAD_PARAM*)params)->dim ;
1330 ((S_DIM_THREAD_PARAM*)params)->prob_sum = hmm->
best_path(dim);
1334 #endif //USE_HMMPARALLEL
1337 #ifdef USE_HMMPARALLEL
1339 void CHMM::ab_buf_comp(
1365 a_buf[N*i+j]=a_sum-dimmodprob ;
1379 b_buf[M*i+j]=b_sum-dimmodprob ;
1417 pthread_t *threads=
SG_MALLOC(pthread_t, num_threads);
1418 S_BW_THREAD_PARAM *params=
SG_MALLOC(S_BW_THREAD_PARAM, num_threads);
1423 for (cpu=0; cpu<num_threads; cpu++)
1430 params[cpu].hmm=hmm;
1438 params[cpu].dim_start=start;
1439 params[cpu].dim_stop=stop;
1441 pthread_create(&threads[cpu], NULL, bw_dim_prefetch, ¶ms[cpu]);
1444 for (cpu=0; cpu<num_threads; cpu++)
1446 pthread_join(threads[cpu], NULL);
1463 fullmodprob+=params[cpu].ret;
1467 for (cpu=0; cpu<num_threads; cpu++)
1487 #else // USE_HMMPARALLEL
1526 fullmodprob+=dimmodprob ;
1534 int32_t num = trans_list_backward_cnt[i] ;
1537 for (j=0; j<num; j++)
1539 int32_t jj = trans_list_backward[i][j] ;
1612 fullmodprob+=dimmodprob ;
1657 #endif // USE_HMMPARALLEL
1694 fullmodprob+=dimmodprob ;
1702 int32_t num = trans_list_backward_cnt[i] ;
1704 for (j=0; j<num; j++)
1706 int32_t jj = trans_list_backward[i][j] ;
1733 int32_t i,j,old_i,k,t,dim;
1767 #ifdef USE_HMMPARALLEL
1769 pthread_t *threads=
SG_MALLOC(pthread_t, num_threads);
1770 S_DIM_THREAD_PARAM *params=
SG_MALLOC(S_DIM_THREAD_PARAM, num_threads);
1779 #ifdef USE_HMMPARALLEL
1780 if (dim%num_threads==0)
1782 for (i=0; i<num_threads; i++)
1784 if (dim+i<p_observations->get_num_vectors())
1786 params[i].hmm=estimate ;
1787 params[i].dim=dim+i ;
1788 pthread_create(&threads[i], NULL, bw_single_dim_prefetch, (
void*)¶ms[i]) ;
1791 for (i=0; i<num_threads; i++)
1793 if (dim+i<p_observations->get_num_vectors())
1795 pthread_join(threads[i], NULL);
1796 dimmodprob = params[i].prob_sum;
1802 #endif // USE_HMMPARALLEL
1805 fullmodprob+= dimmodprob;
1872 #ifdef USE_HMMPARALLEL
1931 #ifdef USE_HMMPARALLEL
1933 pthread_t *threads=
SG_MALLOC(pthread_t, num_threads);
1934 S_DIM_THREAD_PARAM *params=
SG_MALLOC(S_DIM_THREAD_PARAM, num_threads);
1943 #ifdef USE_HMMPARALLEL
1944 if (dim%num_threads==0)
1946 for (i=0; i<num_threads; i++)
1948 if (dim+i<p_observations->get_num_vectors())
1950 params[i].hmm=estimate ;
1951 params[i].dim=dim+i ;
1952 pthread_create(&threads[i], NULL, vit_dim_prefetch, (
void*)¶ms[i]) ;
1955 for (i=0; i<num_threads; i++)
1957 if (dim+i<p_observations->get_num_vectors())
1959 pthread_join(threads[i], NULL);
1960 allpatprob += params[i].prob_sum;
1967 #endif // USE_HMMPARALLEL
1972 set_A(estimate->PATH(dim)[t], estimate->PATH(dim)[t+1],
get_A(estimate->PATH(dim)[t], estimate->PATH(dim)[t+1])+1);
1978 P[estimate->PATH(dim)[0]]++;
1982 #ifdef USE_HMMPARALLEL
2019 set_p(i, log(P[i]/sum));
2027 set_q(i, log(Q[i]/sum));
2056 #ifdef USE_HMMPARALLEL
2058 pthread_t *threads=
SG_MALLOC(pthread_t, num_threads);
2059 S_DIM_THREAD_PARAM *params=
SG_MALLOC(S_DIM_THREAD_PARAM, num_threads);
2066 #ifdef USE_HMMPARALLEL
2067 if (dim%num_threads==0)
2069 for (i=0; i<num_threads; i++)
2071 if (dim+i<p_observations->get_num_vectors())
2073 params[i].hmm=estimate ;
2074 params[i].dim=dim+i ;
2075 pthread_create(&threads[i], NULL, vit_dim_prefetch, (
void*)¶ms[i]) ;
2078 for (i=0; i<num_threads; i++)
2080 if (dim+i<p_observations->get_num_vectors())
2082 pthread_join(threads[i], NULL);
2083 allpatprob += params[i].prob_sum;
2087 #else // USE_HMMPARALLEL
2090 #endif // USE_HMMPARALLEL
2096 set_A(estimate->PATH(dim)[t], estimate->PATH(dim)[t+1],
get_A(estimate->PATH(dim)[t], estimate->PATH(dim)[t+1])+1);
2102 P[estimate->PATH(dim)[0]]++;
2106 #ifdef USE_HMMPARALLEL
2222 SG_INFO(
"log(Pr[O|model])=%e, #states: %i, #observationssymbols: %i, #observations: %ix%i\n",
2229 SG_INFO(
"\ntransition matrix\n");
2242 if (fabs(checksum)>1e-5)
2243 SG_DEBUG(
" checksum % E ******* \n",checksum);
2245 SG_DEBUG(
" checksum % E\n",checksum);
2249 SG_INFO(
"\ndistribution of start states\n");
2258 if (fabs(checksum)>1e-5)
2259 SG_DEBUG(
" checksum % E ******* \n",checksum);
2261 SG_DEBUG(
" checksum=% E\n", checksum);
2264 SG_INFO(
"\ndistribution of terminal states\n");
2273 if (fabs(checksum)>1e-5)
2274 SG_DEBUG(
" checksum % E ******* \n",checksum);
2276 SG_DEBUG(
" checksum=% E\n", checksum);
2279 SG_INFO(
"\ndistribution of observations given the state\n");
2290 if (fabs(checksum)>1e-5)
2291 SG_DEBUG(
" checksum % E ******* \n",checksum);
2293 SG_DEBUG(
" checksum % E\n",checksum);
2307 SG_INFO(
"log(Pr[O|model])=%e, #states: %i, #observationssymbols: %i, #observations: %ix%i\n",
2314 SG_INFO(
"\ntransition matrix\n");
2332 SG_INFO(
"\n\ndistribution of observations given the state\n");
2683 if (mem_initialized)
2685 if (trans_list_forward_cnt)
2686 SG_FREE(trans_list_forward_cnt);
2687 trans_list_forward_cnt=NULL ;
2688 if (trans_list_backward_cnt)
2689 SG_FREE(trans_list_backward_cnt);
2690 trans_list_backward_cnt=NULL ;
2691 if (trans_list_forward)
2693 for (int32_t i=0; i<trans_list_len; i++)
2694 if (trans_list_forward[i])
2695 SG_FREE(trans_list_forward[i]);
2697 trans_list_forward=NULL ;
2699 if (trans_list_backward)
2701 for (int32_t i=0; i<trans_list_len; i++)
2702 if (trans_list_backward[i])
2703 SG_FREE(trans_list_backward[i]);
2705 trans_list_backward = NULL ;
2708 trans_list_len =
N ;
2712 for (int32_t j=0; j<
N; j++)
2714 trans_list_forward_cnt[j]= 0 ;
2716 for (int32_t i=0; i<
N; i++)
2719 trans_list_forward[j][trans_list_forward_cnt[j]]=i ;
2720 trans_list_forward_cnt[j]++ ;
2727 for (int32_t i=0; i<
N; i++)
2729 trans_list_backward_cnt[i]= 0 ;
2731 for (int32_t j=0; j<
N; j++)
2734 trans_list_backward[i][trans_list_backward_cnt[i]]=j ;
2735 trans_list_backward_cnt[i]++ ;
2745 #ifdef USE_HMMPARALLEL_STRUCTURES
2755 #else // USE_HMMPARALLEL_STRUCTURES
2761 #endif // USE_HMMPARALLEL_STRUCTURES
2767 while (((value=fgetc(file)) != EOF) && (value!=
'['))
2774 error(
line,
"expected \"[\" in input file");
2776 while (((value=fgetc(file)) != EOF) && (isspace(value)))
2782 ungetc(value, file);
2788 while (((value=fgetc(file)) != EOF) && (value!=
']'))
2795 error(
line,
"expected \"]\" in input file");
2801 while (((value=fgetc(file)) != EOF) && (value!=
',') && (value!=
';') && (value!=
']'))
2808 ungetc(value, file);
2809 SG_ERROR(
"found ']' instead of ';' or ','\n") ;
2814 error(
line,
"expected \";\" or \",\" in input file");
2816 while (((value=fgetc(file)) != EOF) && (isspace(value)))
2821 ungetc(value, file);
2829 while (((value=fgetc(file)) != EOF) &&
2830 !isdigit(value) && (value!=
'A')
2831 && (value!=
'C') && (value!=
'G') && (value!=
'T')
2832 && (value!=
'N') && (value!=
'n')
2833 && (value!=
'.') && (value!=
'-') && (value!=
'e') && (value!=
']'))
2840 ungetc(value,file) ;
2864 while (((value=fgetc(file)) != EOF) &&
2865 (isdigit(value) || (value==
'.') || (value==
'-') || (value==
'e')
2866 || (value==
'A') || (value==
'C') || (value==
'G')|| (value==
'T')
2867 || (value==
'N') || (value==
'n')) && (i<length))
2883 case '1':
case '2':
case'3':
case '4':
case'5':
2884 case '6':
case '7':
case'8':
case '9':
case '0': break ;
2885 case '.':
case 'e':
case '-': break ;
2887 SG_ERROR(
"found crap: %i %c (pos:%li)\n",i,value,ftell(file)) ;
2891 ungetc(value, file);
2894 return (i<=length) && (i>0);
2936 int32_t received_params=0;
2949 int32_t value=fgetc(file);
2961 if (received_params &
GOTN)
2962 error(
line,
"in model file: \"p double defined\"");
2966 else if (value==
'M')
2968 if (received_params &
GOTM)
2969 error(
line,
"in model file: \"p double defined\"");
2973 else if (value==
'%')
2980 if (received_params &
GOTp)
2981 error(
line,
"in model file: \"p double defined\"");
2987 if (received_params &
GOTq)
2988 error(
line,
"in model file: \"q double defined\"");
2992 else if (value==
'a')
2994 if (received_params &
GOTa)
2995 error(
line,
"in model file: \"a double defined\"");
2999 else if (value==
'b')
3001 if (received_params &
GOTb)
3002 error(
line,
"in model file: \"b double defined\"");
3006 else if (value==
'%')
3016 this->N= atoi(buffer);
3017 received_params|=
GOTN;
3029 this->M= atoi(buffer);
3030 received_params|=
GOTM;
3044 for (i=0; i<this->
N; i++)
3048 for (j=0; j<this->
N ; j++)
3051 if (fscanf( file,
"%le", &f ) != 1)
3067 received_params|=
GOTa;
3078 for (i=0; i<this->
N; i++)
3082 for (j=0; j<this->
M ; j++)
3085 if (fscanf( file,
"%le", &f ) != 1)
3101 received_params|=
GOTb;
3112 for (i=0; i<this->
N ; i++)
3114 if (fscanf( file,
"%le", &f ) != 1)
3124 received_params|=
GOTp;
3135 for (i=0; i<this->
N ; i++)
3137 if (fscanf( file,
"%le", &f ) != 1)
3147 received_params|=
GOTq;
3154 else if (value==
'\n')
3168 SG_WARNING(
"not normalizing anymore, call normalize_hmm to make sure the hmm is valid!!\n");
3238 int32_t received_params=0x0000000;
3265 int32_t value=fgetc(file);
3278 if (fgetc(file)==
'e' && fgetc(file)==
'a' && fgetc(file)==
'r' && fgetc(file)==
'n' && fgetc(file)==
'_')
3295 error(
line,
"a,b,p or q expected in train definition file");
3299 else if (value==
'c')
3301 if (fgetc(file)==
'o' && fgetc(file)==
'n' && fgetc(file)==
's'
3302 && fgetc(file)==
't' && fgetc(file)==
'_')
3319 error(
line,
"a,b,p or q expected in train definition file");
3323 else if (value==
'%')
3327 else if (value==EOF)
3336 bool finished=
false;
3340 SG_DEBUG(
"\nlearn for transition matrix: ") ;
3356 SG_ERROR(
"invalid value for learn_a(%i,0): %i\n",i/2,(
int)value) ;
3374 SG_ERROR(
"invalid value for learn_a(%i,1): %i\n",i/2-1,(
int)value) ;
3383 SG_DEBUG(
"%i Entries",(
int)(i/2)) ;
3399 bool finished=
false;
3403 SG_DEBUG(
"\nlearn for emission matrix: ") ;
3411 for (int32_t j=0; j<2; j++)
3427 SG_ERROR(
"invalid value for learn_b(%i,0): %i\n",i/2,(
int)value) ;
3443 SG_ERROR(
"invalid value for learn_b(%i,1): %i\n",i/2-1,(
int)value) ;
3447 SG_DEBUG(
"%i Entries",(
int)(i/2-1)) ;
3462 bool finished=
false;
3466 SG_DEBUG(
"\nlearn start states: ") ;
3481 SG_ERROR(
"invalid value for learn_p(%i): %i\n",i-1,(
int)value) ;
3506 bool finished=
false;
3510 SG_DEBUG(
"\nlearn terminal states: ") ;
3524 SG_ERROR(
"invalid value for learn_q(%i): %i\n",i-1,(
int)value) ;
3549 bool finished=
false;
3554 SG_DEBUG(
"\nconst for transition matrix: \n") ;
3556 SG_DEBUG(
"\nconst for transition matrix: ") ;
3575 SG_ERROR(
"invalid value for const_a(%i,0): %i\n",i/2,(
int)value) ;
3594 SG_ERROR(
"invalid value for const_a(%i,1): %i\n",i/2-1,(
int)value) ;
3611 if ((dvalue>1.0) || (dvalue<0.0))
3612 SG_ERROR(
"invalid value for const_a_val(%i): %e\n",(
int)i/2-1,dvalue) ;
3625 SG_DEBUG(
"%i Entries",(
int)i/2-1) ;
3641 bool finished=
false;
3646 SG_DEBUG(
"\nconst for emission matrix: \n") ;
3648 SG_DEBUG(
"\nconst for emission matrix: ") ;
3654 for (int32_t j=0; j<3; j++)
3671 SG_ERROR(
"invalid value for const_b(%i,0): %i\n",i/2-1,(
int)value) ;
3682 if ((dvalue>1.0) || (dvalue<0.0))
3683 SG_ERROR(
"invalid value for const_b_val(%i,1): %e\n",i/2-1,dvalue) ;
3707 SG_ERROR(
"invalid value for const_b(%i,1): %i\n",i/2-1, combine) ;
3709 if (verbose && !finished)
3715 SG_ERROR(
"%i Entries",(
int)i/2-1) ;
3730 bool finished=
false;
3735 SG_DEBUG(
"\nconst for start states: \n") ;
3737 SG_DEBUG(
"\nconst for start states: ") ;
3755 SG_ERROR(
"invalid value for const_p(%i): %i\n",i,(
int)value) ;
3773 if ((dvalue>1) || (dvalue<0))
3774 SG_ERROR(
"invalid value for const_p_val(%i): %e\n",i,dvalue) ;
3804 bool finished=
false;
3807 SG_DEBUG(
"\nconst for terminal states: \n") ;
3809 SG_DEBUG(
"\nconst for terminal states: ") ;
3827 SG_ERROR(
"invalid value for const_q(%i): %i\n",i,(
int)value) ;
3844 if ((dvalue>1) || (dvalue<0))
3845 SG_ERROR(
"invalid value for const_q_val(%i): %e\n",i,(
double) dvalue) ;
3873 else if (value==
'\n')
3945 fprintf(file,
"%s",
"% HMM - specification\n% N - number of states\n% M - number of observation_tokens\n% a is state_transition_matrix\n% size(a)= [N,N]\n%\n% b is observation_per_state_matrix\n% size(b)= [N,M]\n%\n% p is initial distribution\n% size(p)= [1, N]\n\n% q is distribution of end states\n% size(q)= [1, N]\n");
3946 fprintf(file,
"N=%d;\n",N);
3947 fprintf(file,
"M=%d;\n",M);
3949 fprintf(file,
"p=[");
3954 fprintf(file,
"%e,", (
double)
get_p(i));
3956 fprintf(file,
"%f,", NAN_REPLACEMENT);
3960 fprintf(file,
"%e", (
double)
get_p(i));
3962 fprintf(file,
"%f", NAN_REPLACEMENT);
3966 fprintf(file,
"];\n\nq=[");
3971 fprintf(file,
"%e,", (
double)
get_q(i));
3973 fprintf(file,
"%f,", NAN_REPLACEMENT);
3977 fprintf(file,
"%e", (
double)
get_q(i));
3979 fprintf(file,
"%f", NAN_REPLACEMENT);
3982 fprintf(file,
"];\n\na=[");
3986 fprintf(file,
"\t[");
3992 fprintf(file,
"%e,", (
double)
get_a(i,j));
3994 fprintf(file,
"%f,", NAN_REPLACEMENT);
3998 fprintf(file,
"%e];\n", (
double)
get_a(i,j));
4000 fprintf(file,
"%f];\n", NAN_REPLACEMENT);
4005 fprintf(file,
" ];\n\nb=[");
4009 fprintf(file,
"\t[");
4015 fprintf(file,
"%e,", (
double)
get_b(i,j));
4017 fprintf(file,
"%f,", NAN_REPLACEMENT);
4021 fprintf(file,
"%e];\n", (
double)
get_b(i,j));
4023 fprintf(file,
"%f];\n", NAN_REPLACEMENT);
4027 result= (fprintf(file,
" ];\n") == 5);
4041 result[i]=PATH(dim)[i];
4057 fprintf(file,
"%i. path probability:%e\nstate sequence:\n", dim, prob);
4059 fprintf(file,
"%d ", PATH(dim)[i]);
4061 fprintf(file,
"\n\n") ;
4079 fwrite(&prob,
sizeof(
float32_t), 1, file);
4093 fprintf(file,
"%% likelihood of model per observation\n%% P[O|model]=[ P[O|model]_1 P[O|model]_2 ... P[O|model]_dim ]\n%%\n");
4095 fprintf(file,
"P=[");
4106 #define FLOATWRITE(file, value) { float32_t rrr=float32_t(value); fwrite(&rrr, sizeof(float32_t), 1, file); num_floats++;}
4110 int32_t i,j,q, num_floats=0 ;
4122 SG_INFO(
"wrote %i parameters for p\n",N) ;
4126 SG_INFO(
"wrote %i parameters for q\n",N) ;
4132 SG_INFO(
"wrote %i parameters for a\n",N*N) ;
4137 SG_INFO(
"wrote %i parameters for b\n",N*M) ;
4156 int32_t num_p, num_q, num_a, num_b ;
4164 SG_INFO(
"wrote %i parameters for p\n",num_p) ;
4169 SG_INFO(
"wrote %i parameters for q\n",num_q) ;
4181 SG_INFO(
"wrote %i parameters for a\n",num_a) ;
4192 SG_INFO(
"wrote %i parameters for b\n",num_b) ;
4216 fprintf(logfile,
"%% lambda denotes the model\n%% O denotes the observation sequence\n%% Q denotes the path\n%% \n%% calculating derivatives of P[O,Q|lambda]=p_{Q1}b_{Q1}(O_1}*a_{Q1}{Q2}b_{Q2}(O2)*...*q_{T-1}{T}b_{QT}(O_T}q_{Q_T} against p,q,a,b\n%%\n");
4217 fprintf(logfile,
"%% dPr[...]=[ [dp_1,...,dp_N,dq_1,...,dq_N, da_11,da_12,..,da_1N,..,da_NN, db_11,.., db_NN]\n");
4218 fprintf(logfile,
"%% [dp_1,...,dp_N,dq_1,...,dq_N, da_11,da_12,..,da_1N,..,da_NN, db_11,.., db_NN]\n");
4219 fprintf(logfile,
"%% ............................. \n");
4220 fprintf(logfile,
"%% [dp_1,...,dp_N,dq_1,...,dq_N, da_11,da_12,..,da_1N,..,da_NN, db_11,.., db_MM]\n");
4221 fprintf(logfile,
"%% ];\n%%\n\ndPr(log()) = [\n");
4230 fprintf(logfile,
"[ ");
4248 fseek(logfile,ftell(logfile)-1,SEEK_SET);
4249 fprintf(logfile,
" ];\n");
4252 fprintf(logfile,
"];");
4262 int32_t num_floats=0 ;
4266 SG_WARNING(
"No definitions loaded -- writing derivatives of all weights\n") ;
4268 SG_INFO(
"writing derivatives of changed weights only\n") ;
4344 int32_t num_floats=0 ;
4347 SG_WARNING(
"No definitions loaded -- writing derivatives of all weights\n") ;
4349 SG_INFO(
"writing derivatives of changed weights only\n") ;
4351 #ifdef USE_HMMPARALLEL
4353 pthread_t *threads=
SG_MALLOC(pthread_t, num_threads);
4354 S_DIM_THREAD_PARAM *params=
SG_MALLOC(S_DIM_THREAD_PARAM, num_threads);
4368 #ifdef USE_HMMPARALLEL
4369 if (dim%num_threads==0)
4371 for (i=0; i<num_threads; i++)
4373 if (dim+i<p_observations->get_num_vectors())
4375 params[i].hmm=this ;
4376 params[i].dim=dim+i ;
4377 pthread_create(&threads[i], NULL, bw_dim_prefetch, (
void*)¶ms[i]) ;
4381 for (i=0; i<num_threads; i++)
4383 if (dim+i<p_observations->get_num_vectors())
4384 pthread_join(threads[i], NULL);
4414 SG_INFO(
"Number of parameters (including posterior prob.): %i\n", num_floats) ;
4445 SG_INFO(
"Number of parameters (including posterior prob.): %i\n", num_floats) ;
4451 #ifdef USE_HMMPARALLEL
4469 fprintf(file,
"%% lambda denotes the model\n%% O denotes the observation sequence\n%% Q denotes the path\n%%\n%% calculating derivatives of P[O|lambda]=sum_{all Q}p_{Q1}b_{Q1}(O_1}*a_{Q1}{Q2}b_{Q2}(O2)*...*q_{T-1}{T}b_{QT}(O_T}q_{Q_T} against p,q,a,b\n%%\n");
4470 fprintf(file,
"%% dPr[...]=[ [dp_1,...,dp_N,dq_1,...,dq_N, da_11,da_12,..,da_1N,..,da_NN, db_11,.., db_NN]\n");
4471 fprintf(file,
"%% [dp_1,...,dp_N,dq_1,...,dq_N, da_11,da_12,..,da_1N,..,da_NN, db_11,.., db_NN]\n");
4472 fprintf(file,
"%% ............................. \n");
4473 fprintf(file,
"%% [dp_1,...,dp_N,dq_1,...,dq_N, da_11,da_12,..,da_1N,..,da_NN, db_11,.., db_MM]\n");
4474 fprintf(file,
"%% ];\n%%\n\nlog(dPr) = [\n");
4479 fprintf(file,
"[ ");
4497 fseek(file,ftell(file)-1,SEEK_SET);
4498 fprintf(file,
" ];\n");
4502 fprintf(file,
"];");
4548 for (int32_t j=0; j<
M; j++)
4552 set_b(i,j, log(exp(old_b)-delta)) ;
4556 set_b(i,j, log(exp(old_b)+delta)) ;
4560 float64_t deriv = (prob_new-prob_old)/(2*delta) ;
4570 SG_INFO(
"deriv_calc[%i]=%e\n",dim,deriv_calc) ;
4573 SG_ERROR(
"b(%i,%i)=%e db(%i,%i) = %e:%e\t (%1.5f%%)\n", i,j,exp(old_b),i,j, deriv_calc, deriv, 100.0*(deriv-deriv_calc)/deriv_calc);
4590 for (int32_t j=0; j<
N; j++)
4594 set_a(i,j, log(exp(old_a)-delta)) ;
4598 set_a(i,j, log(exp(old_a)+delta)) ;
4602 float64_t deriv = (prob_new-prob_old)/(2*delta) ;
4608 SG_DEBUG(
"da(%i,%i) = %e:%e\t (%1.5f%%)\n", i,j, deriv_calc, deriv, 100.0*(deriv-deriv_calc)/deriv_calc);
4614 for (int32_t j=0; j<
M; j++)
4618 set_b(i,j, log(exp(old_b)-delta)) ;
4622 set_b(i,j, log(exp(old_b)+delta)) ;
4626 float64_t deriv = (prob_new-prob_old)/(2*delta) ;
4632 SG_DEBUG(
"db(%i,%i) = %e:%e\t (%1.5f%%)\n", i,j, deriv_calc, deriv, 100.0*(deriv-deriv_calc)/(deriv_calc));
4641 set_p(i, log(exp(old_p)-delta)) ;
4645 set_p(i, log(exp(old_p)+delta)) ;
4648 float64_t deriv = (prob_new-prob_old)/(2*delta) ;
4655 SG_DEBUG(
"dp(%i) = %e:%e\t (%1.5f%%)\n", i, deriv_calc, deriv, 100.0*(deriv-deriv_calc)/deriv_calc);
4661 set_q(i, log(exp(old_q)-delta)) ;
4665 set_q(i, log(exp(old_q)+delta)) ;
4669 float64_t deriv = (prob_new-prob_old)/(2*delta) ;
4676 SG_DEBUG(
"dq(%i) = %e:%e\t (%1.5f%%)\n", i, deriv_calc, deriv, 100.0*(deriv-deriv_calc)/deriv_calc);
4684 bool CHMM::check_path_derivatives()
4695 for (int32_t j=0; j<
N; j++)
4699 set_a(i,j, log(exp(old_a)-delta)) ;
4703 set_a(i,j, log(exp(old_a)+delta)) ;
4707 float64_t deriv = (prob_new-prob_old)/(2*delta) ;
4713 SG_DEBUG(
"da(%i,%i) = %e:%e\t (%1.5f%%)\n", i,j, deriv_calc, deriv, 100.0*(deriv-deriv_calc)/deriv_calc);
4718 for (int32_t j=0; j<
M; j++)
4722 set_b(i,j, log(exp(old_b)-delta)) ;
4726 set_b(i,j, log(exp(old_b)+delta)) ;
4730 float64_t deriv = (prob_new-prob_old)/(2*delta) ;
4736 SG_DEBUG(
"db(%i,%i) = %e:%e\t (%1.5f%%)\n", i,j, deriv_calc, deriv, 100.0*(deriv-deriv_calc)/(deriv_calc));
4744 set_p(i, log(exp(old_p)-delta)) ;
4748 set_p(i, log(exp(old_p)+delta)) ;
4751 float64_t deriv = (prob_new-prob_old)/(2*delta) ;
4758 SG_DEBUG(
"dp(%i) = %e:%e\t (%1.5f%%)\n", i, deriv_calc, deriv, 100.0*(deriv-deriv_calc)/deriv_calc);
4764 set_q(i, log(exp(old_q)-delta)) ;
4768 set_q(i, log(exp(old_q)+delta)) ;
4772 float64_t deriv = (prob_new-prob_old)/(2*delta) ;
4779 SG_DEBUG(
"dq(%i) = %e:%e\t (%1.5f%%)\n", i, deriv_calc, deriv, 100.0*(deriv-deriv_calc)/deriv_calc);
4784 #endif // USE_HMMDEBUG
4825 const int32_t num_states=app_model->
get_N();
4828 SG_DEBUG(
"cur N:%d M:%d\n", N, M);
4839 for (i=0; i<N+num_states; i++)
4844 for (j=0; j<N+num_states; j++)
4861 n_a[(N+num_states)*j+i]=
get_a(i,j);
4865 n_b[M*i+j]=
get_b(i,j);
4870 for (i=0; i<app_model->
get_N(); i++)
4872 n_q[i+
N]=app_model->
get_q(i);
4874 for (j=0; j<app_model->
get_N(); j++)
4875 n_a[(N+num_states)*(j+
N)+(i+N)]=app_model->
get_a(i,j);
4876 for (j=0; j<app_model->
get_M(); j++)
4877 n_b[M*(i+N)+j]=app_model->
get_b(i,j);
4884 for (j=N; j<N+num_states; j++)
4904 SG_WARNING(
"not normalizing anymore, call normalize_hmm to make sure the hmm is valid!!\n");
4909 SG_ERROR(
"number of observations is different for append model, doing nothing!\n");
4917 const int32_t num_states=app_model->
get_N()+2;
4929 for (i=0; i<N+num_states; i++)
4934 for (j=0; j<N+num_states; j++)
4951 n_a[(N+num_states)*j+i]=
get_a(i,j);
4955 n_b[M*i+j]=
get_b(i,j);
4960 for (i=0; i<app_model->
get_N(); i++)
4962 n_q[i+N+2]=app_model->
get_q(i);
4964 for (j=0; j<app_model->
get_N(); j++)
4965 n_a[(N+num_states)*(j+N+2)+(i+N+2)]=app_model->
get_a(i,j);
4966 for (j=0; j<app_model->
get_M(); j++)
4967 n_b[M*(i+N+2)+j]=app_model->
get_b(i,j);
4975 n_b[M*N+i]=cur_out[i];
4976 n_b[M*(N+1)+i]=app_out[i];
4980 for (i=0; i<N+num_states; i++)
4984 n_a[(N+num_states)*i + N]=0;
4989 n_a[(N+num_states)*N+i]=
get_q(i);
4994 n_a[(N+num_states)*i+(N+1)]=app_model->
get_p(i-(N+2));
5013 SG_WARNING(
"not normalizing anymore, call normalize_hmm to make sure the hmm is valid!!\n");
5042 n_a[(N+num_states)*j+i]=
get_a(i,j);
5045 n_b[M*i+j]=
get_b(i,j);
5048 for (i=N; i<N+num_states; i++)
5056 for (j=0; j<N+num_states; j++)
5084 for (int32_t i=0; i<
N; i++)
5088 if (exp(
get_p(i)) < value)
5091 if (exp(
get_q(i)) < value)
5096 if (exp(
get_a(i,j)) < value)
5102 if (exp(
get_b(i,j)) < value)
5115 int32_t* hist=
SG_MALLOC(int32_t, histsize);
5121 for (i=0; i<histsize; i++)
5124 for (i=0; i<
get_N(); i++)
5136 startendhist[(
get_N()-len)]++;
5145 for (i=0; i<
get_N()-1; i++)
5148 for (i=0; i<
get_N(); i++)
5151 for (i=0;i<
get_N();i++)
5153 for (int32_t j=0; j<
get_N(); j++)
5172 hist[i*
get_M() + *obs++]++;
5174 startendhist[len-1]++;
5180 for (i=1; i<
get_N(); i++)
5183 for (i=0; i<
get_N(); i++)
5188 for (i=0;i<
get_N();i++)
5190 total-= startendhist[i] ;
5192 for (int32_t j=0; j<
get_N(); j++)
5203 for (i=0;i<
get_N();i++)
5205 for (int32_t j=0; j<
get_M(); j++)
5239 #ifdef USE_HMMPARALLEL_STRUCTURES
5263 #endif //USE_HMMPARALLEL_STRUCTURES
5295 #ifdef USE_HMMPARALLEL_STRUCTURES
5319 #endif //USE_HMMPARALLEL_STRUCTURES
5328 #ifdef USE_HMMPARALLEL_STRUCTURES
5341 #endif //USE_HMMPARALLEL_STRUCTURES
5348 #ifdef USE_HMMPARALLEL_STRUCTURES
5349 SG_INFO(
"allocating mem for path-table of size %.2f Megabytes (%d*%d) each:\n", ((
float32_t)max_T)*N*
sizeof(
T_STATES)/(1024*1024), max_T, N);
5353 SG_DEBUG(
"path_table[%i] successfully allocated\n",i) ;
5355 SG_ERROR(
"failed allocating memory for path_table[%i].\n",i) ;
5358 #else // no USE_HMMPARALLEL_STRUCTURES
5359 SG_INFO(
"allocating mem of size %.2f Megabytes (%d*%d) for path-table ....", ((
float32_t)max_T)*N*
sizeof(
T_STATES)/(1024*1024), max_T, N);
5366 #endif // USE_HMMPARALLEL_STRUCTURES
5370 #ifdef USE_HMMPARALLEL_STRUCTURES
5374 SG_DEBUG(
"alpha_cache[%i].table successfully allocated\n",i) ;
5376 SG_ERROR(
"allocation of alpha_cache[%i].table failed\n",i) ;
5379 SG_DEBUG(
"beta_cache[%i].table successfully allocated\n",i) ;
5381 SG_ERROR(
"allocation of beta_cache[%i].table failed\n",i) ;
5383 #else // USE_HMMPARALLEL_STRUCTURES
5385 SG_DEBUG(
"alpha_cache.table successfully allocated\n") ;
5387 SG_ERROR(
"allocation of alpha_cache.table failed\n") ;
5390 SG_DEBUG(
"beta_cache.table successfully allocated\n") ;
5392 SG_ERROR(
"allocation of beta_cache.table failed\n") ;
5394 #endif // USE_HMMPARALLEL_STRUCTURES
5395 #else // USE_HMMCACHE
5396 #ifdef USE_HMMPARALLEL_STRUCTURES
5402 #else //USE_HMMPARALLEL_STRUCTURES
5405 #endif //USE_HMMPARALLEL_STRUCTURES
5406 #endif //USE_HMMCACHE
5417 ( sequence_number<0 || sequence_number < p_observations->get_num_vectors()))
5419 int32_t min_sequence=sequence_number;
5420 int32_t max_sequence=sequence_number;
5422 if (sequence_number<0)
5426 SG_INFO(
"numseq: %d\n", max_sequence);
5429 SG_INFO(
"min_sequence: %d max_sequence: %d\n", min_sequence, max_sequence);
5430 for (sequence_number=min_sequence; sequence_number<max_sequence; sequence_number++)
5432 int32_t sequence_length=0;
5436 int32_t histsize=
get_M();
5437 int64_t* hist=
SG_MALLOC(int64_t, histsize);
5440 for (i=0; i<sequence_length-window_width; i++)
5442 for (j=0; j<histsize; j++)
5445 uint16_t* ptr=&obs[i];
5446 for (j=0; j<window_width; j++)
5452 for (j=0; j<
get_M(); j++)
5457 perm_entropy+=p*log(p);
5476 else if (num_param<2*N)
5478 else if (num_param<N*(N+2))
5480 int32_t k=num_param-2*
N;
5485 else if (num_param<N*(N+2+M))
5487 int32_t k=num_param-N*(N+2);
5500 return get_p(num_param);
5501 else if (num_param<2*N)
5502 return get_q(num_param-N);
5503 else if (num_param<N*(N+2))
5505 else if (num_param<N*(N+2+M))
5567 if (prob_max<prob_train)
5568 prob_max=prob_train;
5571 if (estimate ==
this)
int32_t get_learn_p(int32_t offset) const
get entry out of learn_p vector
int32_t * learn_p
start states to be learned
void set_observation_nocache(CStringFeatures< uint16_t > *obs)
float64_t * transition_matrix_a
transition matrix
bool mod_prob_updated
true if model probability is up to date
void chop(float64_t value)
set any model parameter with probability smaller than value to ZERO
float64_t backward_comp(int32_t time, int32_t state, int32_t dimension)
int32_t N
number of states
void convert_to_log()
convert model to log probabilities
float64_t backward(int32_t time, int32_t state, int32_t dimension)
inline proxies for backward pass
float64_t get_const_p_val(int32_t offset) const
get value out of const_p_val vector
static const int32_t GOTp
bool save_likelihood(FILE *file)
void close_bracket(FILE *file)
expect closing bracket
float64_t * const_a_val
values for transitions that have constant probability
Model()
Constructor - initializes all variables/structures.
virtual EFeatureType get_feature_type()=0
void set_const_p(int32_t offset, int32_t value)
set value in const_p vector
bool get_numbuffer(FILE *file, char *buffer, int32_t length)
put a sequence of numbers into the buffer
int32_t get_M() const
access function for number of observations M
float64_t pat_prob
probability of best path
float64_t get_const_b_val(int32_t line) const
get value out of const_b_val vector
int32_t get_num_threads() const
bool save_model(FILE *file)
void set_observations(CStringFeatures< uint16_t > *obs, CHMM *hmm=NULL)
void set_const_p_val(int32_t offset, float64_t value)
set value in const_p_val vector
static const float64_t INFTY
infinity
T_STATES * states_per_observation_psi
backtracking table for viterbi can be terrible HUGE O(T*N)
float64_t forward(int32_t time, int32_t state, int32_t dimension)
inline proxies for forward pass
float64_t * const_q_val
values for end states that have constant probability
bool all_path_prob_updated
true if path probability is up to date
#define FLOATWRITE(file, value)
float64_t epsilon
convergence criterion epsilon
void estimate_model_baum_welch_defined(CHMM *train)
static const int32_t GOTconst_p
viterbi only for defined transitions/observations
int32_t get_const_p(int32_t offset) const
get entry out of const_p vector
baum welch only for defined transitions/observations
bool linear_train(bool right_align=false)
estimates linear model from observations.
bool save_model_bin(FILE *file)
int32_t * learn_b
emissions to be learned
float64_t * transition_matrix_A
matrix of absolute counts of transitions
ST get_masked_symbols(ST symbol, uint8_t mask)
static const int32_t GOTlearn_a
float64_t path_derivative_b(T_STATES i, uint16_t j, int32_t dimension)
computes d log p(lambda,best_path)/d b_ij
T_ALPHA_BETA beta_cache
cache for backward variables can be terrible HUGE O(T*N)
float64_t get_b(T_STATES line_, uint16_t column) const
bool path_prob_updated
true if path probability is up to date
bool save_likelihood_bin(FILE *file)
bool baum_welch_viterbi_train(BaumWelchViterbiType type)
static const int32_t GOTO
float64_t forward_comp_old(int32_t time, int32_t state, int32_t dimension)
float64_t * observation_matrix_B
matrix of absolute counts of observations within each state
float64_t get_A(T_STATES line_, T_STATES column) const
float64_t T_ALPHA_BETA_TABLE
type for alpha/beta caching table
Base class Distribution from which all methods implementing a distribution are derived.
bool check_model_derivatives()
numerically check whether derivates were calculated right
virtual int32_t get_num_vectors() const
static const int32_t GOTlearn_p
void open_bracket(FILE *file)
expect open bracket.
int32_t get_learn_q(int32_t offset) const
get entry out of learn_q vector
float64_t get_pseudo() const
returns current pseudo value
float64_t get_B(T_STATES line_, uint16_t column) const
float64_t * const_b_val
values for emissions that have constant probability
static const int32_t GOTlearn_q
virtual bool train(CFeatures *data=NULL)
bool save_model_derivatives(FILE *file)
static const float64_t ALMOST_NEG_INFTY
almost neg (log) infinity
virtual float64_t get_log_model_parameter(int32_t num_param)
void estimate_model_baum_welch_old(CHMM *train)
int32_t path_prob_dimension
dimension for which path_prob was calculated
virtual ~Model()
Destructor - cleans up.
virtual int32_t get_vector_length(int32_t vec_num)
int32_t * learn_q
end states to be learned
bool load_model(FILE *file)
void estimate_model_baum_welch_trans(CHMM *train)
float64_t model_probability(int32_t dimension=-1)
inline proxy for model probability.
int32_t path_deriv_dimension
dimension for which path_deriv was calculated
int32_t * const_a
transitions that have constant probability
static const int32_t GOTb
float64_t mod_prob
probability of model
virtual EFeatureClass get_feature_class()=0
bool check_model_derivatives_combined()
static int is_finite(double f)
checks whether a float is finite
float64_t model_derivative_q(T_STATES i, int32_t dimension)
float64_t * const_p_val
values for start states that have constant probability
T_STATES get_psi(int32_t time, T_STATES state, int32_t dimension) const
floatmax_t get_original_num_symbols()
void set_p(T_STATES offset, float64_t value)
bool permutation_entropy(int32_t window_width, int32_t sequence_number)
compute permutation entropy
static const int32_t GOTconst_a
float64_t * end_state_distribution_q
distribution of end-states
void add_states(int32_t num_states, float64_t default_val=0)
float64_t PSEUDO
define pseudocounts against overfitting
int32_t get_num_symbols() const
float64_t all_pat_prob
probability of best path
float64_t path_derivative_a(T_STATES i, T_STATES j, int32_t dimension)
computes d log p(lambda,best_path)/d a_ij
void free_feature_vector(ST *feat_vec, int32_t num, bool dofree)
void set_const_q_val(int32_t offset, float64_t value)
set value in const_q_val vector
bool save_model_derivatives_bin(FILE *file)
float64_t get_q(T_STATES offset) const
int32_t * const_q
end states that have constant probability
CStringFeatures< uint16_t > * p_observations
observation matrix
bool save_path_derivatives(FILE *file)
void set_learn_a(int32_t offset, int32_t value)
set value in learn_a matrix
void set_learn_q(int32_t offset, int32_t value)
set value in learn_q vector
void set_A(T_STATES line_, T_STATES column, float64_t value)
SGVector< ST > get_feature_vector(int32_t num)
void set_q(T_STATES offset, float64_t value)
floatmax_t get_num_symbols()
void set_B(T_STATES line_, uint16_t column, float64_t value)
int32_t get_const_q(int32_t offset) const
get entry out of const_q vector
int32_t iterations
convergence criterion iterations
float64_t get_const_q_val(int32_t offset) const
get value out of const_q_val vector
static const int32_t GOTq
static const int32_t GOTlearn_b
float64_t * observation_matrix_b
distribution of observations within each state
T_STATES * get_path(int32_t dim, float64_t &prob)
float64_t model_derivative_a(T_STATES i, T_STATES j, int32_t dimension)
computes log dp(lambda)/d a_ij.
static bool cancel_computations()
float64_t best_path(int32_t dimension)
int32_t get_const_a(int32_t line, int32_t column) const
get entry out of const_a matrix
float64_t model_probability_comp()
void set_const_b_val(int32_t offset, float64_t value)
set value in const_b_val vector
bool path_deriv_updated
true if path derivative is up to date
virtual float64_t get_log_derivative(int32_t num_param, int32_t num_example)
bool append_model(CHMM *append_model, float64_t *cur_out, float64_t *app_out)
void estimate_model_viterbi(CHMM *train)
int32_t M
number of observation symbols eg. ACGT -> 0123
void sort_learn_a()
sorts learn_a matrix
void set_learn_p(int32_t offset, int32_t value)
set value in learn_p vector
bool load_definitions(FILE *file, bool verbose, bool initialize=true)
void set_learn_b(int32_t offset, int32_t value)
set value in learn_b matrix
float64_t * initial_state_distribution_p
initial distribution of states
bool save_path_derivatives_bin(FILE *file)
baum welch only for specified transitions
float64_t get_a(T_STATES line_, T_STATES column) const
void clear_model_defined()
initializes only parameters in learn_x with log(PSEUDO)
float64_t path_derivative_p(T_STATES i, int32_t dimension)
computes d log p(lambda,best_path)/d p_i
float64_t forward_comp(int32_t time, int32_t state, int32_t dimension)
float64_t model_derivative_p(T_STATES i, int32_t dimension)
float64_t get_p(T_STATES offset) const
int32_t * const_b
emissions that have constant probability
bool comma_or_space(FILE *file)
expect comma or space.
void estimate_model_viterbi_defined(CHMM *train)
void sort_learn_b()
sorts learn_b matrix
The class Features is the base class of all feature objects.
void init_model_defined()
static const int32_t GOTconst_b
void free_state_dependend_arrays()
free memory that depends on N
virtual int32_t get_max_vector_length()
void set_const_a_val(int32_t offset, float64_t value)
set value in const_a_val vector
bool alloc_state_dependend_arrays()
allocates memory that depends on N
void set_const_a(int32_t offset, int32_t value)
set value in const_a matrix
void set_b(T_STATES line_, uint16_t column, float64_t value)
float64_t path_derivative_q(T_STATES i, int32_t dimension)
computes d log p(lambda,best_path)/d q_i
static void swap(T &a, T &b)
swap e.g. floats a and b
int32_t get_learn_a(int32_t line, int32_t column) const
get entry out of learn_a matrix
CAlphabet * get_alphabet()
float64_t backward_comp_old(int32_t time, int32_t state, int32_t dimension)
static const int32_t GOTM
virtual ~CHMM()
Destructor - Cleanup.
void copy_model(CHMM *l)
copies the the modelparameters from l
void clear_model()
initializes model with log(PSEUDO)
int32_t get_const_b(int32_t line, int32_t column) const
get entry out of const_b matrix
void output_model_defined(bool verbose=false)
performs output_model only for the defined transitions etc
void output_model(bool verbose=false)
static const int32_t GOTconst_q
void set_psi(int32_t time, T_STATES state, T_STATES value, int32_t dimension)
void set_a(T_STATES line_, T_STATES column, float64_t value)
bool initialize(Model *model, float64_t PSEUDO, FILE *model_file=NULL)
void normalize(bool keep_dead_states=false)
normalize the model to satisfy stochasticity
static float64_t logarithmic_sum(float64_t p, float64_t q)
static const int32_t GOTN
virtual ST get_feature(int32_t vec_num, int32_t feat_num)
T_STATES * path
best path (=state sequence) through model
#define SG_MALLOC(type, len)
int32_t * const_p
start states that have constant probability
void init_model_random()
init model with random values
void set_const_b(int32_t offset, int32_t value)
set value in const_b matrix
T_STATES get_N() const
access function for number of states N
void set_const_q(int32_t offset, int32_t value)
set value in const_q vector
int32_t get_learn_b(int32_t line, int32_t column) const
get entry out of learn_b matrix
void estimate_model_baum_welch(CHMM *train)
int32_t * learn_a
transitions to be learned
void error(int32_t p_line, const char *str)
parse error messages
bool save_path(FILE *file)
float64_t model_derivative_b(T_STATES i, uint16_t j, int32_t dimension)
computes log dp(lambda)/d b_ij.
float64_t get_const_a_val(int32_t line) const
get value out of const_a_val vector
T_ALPHA_BETA alpha_cache
cache for forward variables can be terrible HUGE O(T*N)
static const int32_t GOTa