16 #include <shogun/lib/config.h> 42 false,
false,
false,
false,
false,
false,
false,
false} ;
47 :
CSGObject(), m_transition_matrix_a_id(1,1), m_transition_matrix_a(1,1),
48 m_transition_matrix_a_deriv(1,1), m_initial_state_distribution_p(1),
49 m_initial_state_distribution_p_deriv(1), m_end_state_distribution_q(1),
50 m_end_state_distribution_q_deriv(1),
55 m_word_degree(word_degree_default, m_num_degrees, true, true),
56 m_cum_num_words(cum_num_words_default, m_num_degrees+1, true, true),
57 m_cum_num_words_array(m_cum_num_words.get_array()),
58 m_num_words(num_words_default, m_num_degrees, true, true),
59 m_num_words_array(m_num_words.get_array()),
60 m_mod_words(mod_words_default, m_num_svms, 2, true, true),
61 m_mod_words_array(m_mod_words.get_array()),
62 m_sign_words(sign_words_default, m_num_svms, true, true),
63 m_sign_words_array(m_sign_words.get_array()),
64 m_string_words(string_words_default, m_num_svms, true, true),
65 m_string_words_array(m_string_words.get_array()),
67 m_num_unique_words(m_num_degrees),
68 m_svm_arrays_clean(true),
70 m_max_a_id(0), m_observation_matrix(1,1,1),
75 m_genestr(1), m_wordstr(NULL), m_dict_weights(1,1), m_segment_loss(1,1,2),
88 m_plif_matrices(NULL),
92 m_num_intron_plifs(0),
94 m_raw_intensities(NULL),
96 m_num_probes_cum(NULL),
97 m_num_lin_feat_plifs_cum(NULL),
100 m_long_transitions(true),
101 m_long_transition_threshold(1000)
103 trans_list_forward = NULL ;
104 trans_list_forward_cnt = NULL ;
105 trans_list_forward_val = NULL ;
106 trans_list_forward_id = NULL ;
109 mem_initialized = true ;
126 if (trans_list_forward_cnt)
127 SG_FREE(trans_list_forward_cnt);
128 if (trans_list_forward)
130 for (int32_t i=0; i<trans_list_len; i++)
132 if (trans_list_forward[i])
133 SG_FREE(trans_list_forward[i]);
135 SG_FREE(trans_list_forward);
137 if (trans_list_forward_val)
139 for (int32_t i=0; i<trans_list_len; i++)
141 if (trans_list_forward_val[i])
142 SG_FREE(trans_list_forward_val[i]);
144 SG_FREE(trans_list_forward_val);
146 if (trans_list_forward_id)
148 for (int32_t i=0; i<trans_list_len; i++)
150 if (trans_list_forward_id[i])
151 SG_FREE(trans_list_forward_id[i]);
153 SG_FREE(trans_list_forward_id);
185 for (int32_t i=0; i<length-2; i++)
221 int32_t* probe_pos,
float64_t* intensities,
const int32_t num_probes)
230 if (m_num_raw_data==1){
231 sg_memcpy(tmp_probe_pos, probe_pos, num_probes*
sizeof(int32_t));
232 sg_memcpy(tmp_raw_intensities, intensities, num_probes*
sizeof(
float64_t));
237 sg_memcpy(tmp_probe_pos+
m_num_probes_cum[m_num_raw_data-1], probe_pos, num_probes*
sizeof(int32_t));
255 for (
int s=0; s<p_num_svms; s++)
271 memset(tmp, 0, (dim1+num_new_feat)*dim2*
sizeof(
float64_t)) ;
274 tmp[j*(dim1+num_new_feat)+k] = arr[j*dim1+k];
294 CPlif** PEN,
const int32_t* tiling_plif_ids,
const int32_t num_tiling_plifs)
301 int32_t* tiling_rows = SG_MALLOC(int32_t, num_tiling_plifs);
302 for (int32_t i=0; i<num_tiling_plifs; i++)
305 CPlif * plif = PEN[tiling_plif_ids[i]];
317 for (int32_t pos_idx=0;pos_idx<
m_seq_len;pos_idx++)
321 for (int32_t i=0; i<num_tiling_plifs; i++)
324 CPlif * plif = PEN[tiling_plif_ids[i]];
334 for (int32_t i=0; i<num_tiling_plifs; i++)
338 SG_FREE(tiling_plif);
339 SG_FREE(tiling_rows);
354 m_wordstr[k][j]=SG_MALLOC(uint16_t, genestr_len);
355 for (int32_t i=0; i<genestr_len; i++)
380 int32_t from_pos =
m_pos[p];
381 int32_t to_pos =
m_pos[p+1];
389 my_svm_values_unnormalized[s]=0.0;
391 for (int32_t i=from_pos; i<to_pos; i++)
409 if (prev<-1e20 || prev>1e20)
411 SG_ERROR(
"initialization missing (%i, %i, %f)\n", s, p, prev)
416 SG_FREE(my_svm_values_unnormalized);
426 SG_ERROR(
"length of start prob vector p (%i) is not equal to the number of states (%i), N: %i\n",p.
vlen,
m_N)
434 SG_ERROR(
"length of end prob vector q (%i) is not equal to the number of states (%i), N: %i\n",q.
vlen,
m_N)
452 for (int32_t i=0; i<
m_N; i++)
454 for (int32_t j=0; j<
m_N; j++)
466 if (!((num_cols==3) || (num_cols==4)))
467 SG_ERROR(
"!((num_cols==3) || (num_cols==4)), num_cols: %i\n",num_cols)
469 SG_FREE(trans_list_forward);
470 SG_FREE(trans_list_forward_cnt);
471 SG_FREE(trans_list_forward_val);
472 SG_FREE(trans_list_forward_id);
474 trans_list_forward = NULL ;
475 trans_list_forward_cnt = NULL ;
476 trans_list_forward_val = NULL ;
482 mem_initialized = true ;
484 trans_list_forward_cnt=NULL ;
485 trans_list_len =
m_N ;
489 trans_list_forward_id = SG_MALLOC(int32_t*,
m_N);
492 for (int32_t j=0; j<
m_N; j++)
494 int32_t old_start_idx=start_idx;
496 while (start_idx<num_trans && a_trans.
matrix[start_idx+num_trans]==j)
500 if (start_idx>1 && start_idx<num_trans)
504 if (start_idx>1 && start_idx<num_trans)
507 int32_t len=start_idx-old_start_idx;
510 trans_list_forward_cnt[j] = 0 ;
514 trans_list_forward[j] = SG_MALLOC(
T_STATES, len);
515 trans_list_forward_val[j] = SG_MALLOC(
float64_t, len);
516 trans_list_forward_id[j] = SG_MALLOC(int32_t, len);
520 trans_list_forward[j] = NULL;
521 trans_list_forward_val[j] = NULL;
522 trans_list_forward_id[j] = NULL;
526 for (int32_t i=0; i<num_trans; i++)
528 int32_t from_state = (int32_t)a_trans.
matrix[i] ;
529 int32_t to_state = (int32_t)a_trans.
matrix[i+num_trans] ;
533 id = (int32_t)a_trans.
matrix[i+num_trans*3] ;
536 ASSERT(to_state>=0 && to_state<m_N)
537 ASSERT(from_state>=0 && from_state<m_N)
539 trans_list_forward[to_state][trans_list_forward_cnt[to_state]]=from_state ;
540 trans_list_forward_val[to_state][trans_list_forward_cnt[to_state]]=val ;
541 trans_list_forward_id[to_state][trans_list_forward_cnt[to_state]]=id ;
542 trans_list_forward_cnt[to_state]++ ;
549 for (int32_t i=0; i<
m_N; i++)
550 for (int32_t j=0; j<
m_N; j++)
615 SG_WARNING(
"SVM array: word_degree.get_dim1()!=m_num_degrees")
617 SG_WARNING(
"SVM array: m_cum_num_words.get_dim1()!=m_num_degrees+1")
619 SG_WARNING(
"SVM array: m_num_words.get_dim1()==m_num_degrees")
623 SG_WARNING(
"SVM array: m_num_unique_words.get_dim1()!=m_num_degrees")
625 SG_WARNING(
"SVM array: m_mod_words.get_dim1()!=num_svms")
627 SG_WARNING(
"SVM array: m_mod_words.get_dim2()!=2")
629 SG_WARNING(
"SVM array: m_sign_words.get_dim1()!=num_svms")
631 SG_WARNING(
"SVM array: m_string_words.get_dim1()!=num_svms")
641 SG_ERROR(
"Expected 3-dimensional Matrix\n")
643 int32_t N=seq.
dims[0];
644 int32_t cand_pos=seq.
dims[1];
645 int32_t max_num_features=seq.
dims[2];
670 if (seg_path.
matrix!=NULL)
672 int32_t *segment_ids = SG_MALLOC(int32_t,
m_seq_len);
676 segment_ids[i] = (int32_t)seg_path.
matrix[2*i] ;
677 segment_mask[i] = seg_path.
matrix[2*i+1] ;
680 SG_FREE(segment_ids);
681 SG_FREE(segment_mask);
685 int32_t *izeros = SG_MALLOC(int32_t,
m_seq_len);
714 if ((!seq_sparse1 && seq_sparse2) || (seq_sparse1 && !seq_sparse2))
715 SG_ERROR(
"Sparse features must either both be NULL or both NON-NULL\n")
763 SG_ERROR(
"m_dict_weights array does not match num_svms=%i!=%i\n",
784 SG_ERROR(
"segment_loss should be 2 x quadratic matrix: %i!=%i\n", 2*m, n)
796 int32_t* segment_ids,
float64_t* segment_mask, int32_t m)
802 for (int32_t i=1;i<m;i++)
847 int32_t sz =
sizeof(
float64_t)*(*seq_len);
849 *scores = SG_MALLOC(
float64_t, *seq_len);
861 int32_t sz =
sizeof(
float64_t)*(*seq_len);
863 *losses = SG_MALLOC(
float64_t, *seq_len);
872 int32_t orf_from, int32_t orf_to, int32_t start, int32_t &last_pos,
875 #ifdef DYNPROG_TIMING_DETAIL 884 int32_t orf_target = orf_to-orf_from ;
885 if (orf_target<0) orf_target+=3 ;
895 #ifdef DYNPROG_TIMING_DETAIL 897 orf_time += MyTime.time_diff_sec() ;
902 for (; pos>=start; pos-=3)
905 #ifdef DYNPROG_TIMING_DETAIL 907 orf_time += MyTime.time_diff_sec() ;
915 #ifdef DYNPROG_TIMING_DETAIL 917 orf_time += MyTime.time_diff_sec() ;
923 int16_t nbest,
bool with_loss,
bool with_multiple_sequences)
938 for (int32_t i=0; i<nbest; i++)
956 #ifdef DYNPROG_TIMING 957 segment_init_time = 0.0 ;
958 segment_pos_time = 0.0 ;
959 segment_extend_time = 0.0 ;
960 segment_clean_time = 0.0 ;
962 svm_init_time = 0.0 ;
964 svm_clean_time = 0.0 ;
965 inner_loop_time = 0.0 ;
966 content_svm_values_time = 0.0 ;
967 content_plifs_time = 0.0 ;
968 inner_loop_max_time = 0.0 ;
969 long_transition_time = 0.0 ;
988 int32_t max_look_back = 1000 ;
989 bool use_svm = false ;
1003 #ifdef DYNPROG_DEBUG 1007 SG_PRINT(
"m_num_lin_feat_plifs_cum: ")
1023 if (seq_array!=NULL)
1034 SG_PRINT(
"using sparse seq_array\n")
1038 ASSERT(max_num_signals==2)
1041 for (int32_t i=0; i<
m_N; i++)
1045 for (int32_t i=0; i<
m_N; i++)
1047 for (int32_t k=0; k<max_num_signals; k++)
1049 if ((PEN_state_signals.
element(i,k)==NULL) && (k==0))
1052 if (seq_input!=NULL)
1063 if (PEN_state_signals.
element(i,k)!=NULL)
1065 if (seq_input!=NULL)
1106 #ifdef DYNPROG_DEBUG 1112 #ifdef DYNPROG_DEBUG 1124 SG_ERROR(
"Long transitions are not supported for nbest!=1")
1125 long_transitions = false ;
1128 #ifdef DYNPROG_DEBUG 1129 long_transition_content_scores_pen.
set_const(0) ;
1130 long_transition_content_scores_elem.
set_const(0) ;
1131 long_transition_content_scores_prev.
set_const(0) ;
1134 long_transition_content_scores_loss.
set_const(0) ;
1135 long_transition_content_start.
set_const(0) ;
1136 long_transition_content_start_position.
set_const(0) ;
1137 #ifdef DYNPROG_DEBUG 1138 long_transition_content_end_position.
set_const(0) ;
1152 for (int32_t i=0; i<
m_N; i++)
1153 for (int32_t j=0; j<
m_N; j++)
1159 for (int32_t j=0; j<
m_N; j++)
1162 const T_STATES num_elem = trans_list_forward_cnt[j] ;
1163 const T_STATES *elem_list = trans_list_forward[j] ;
1165 for (int32_t i=0; i<num_elem; i++)
1172 if (long_transitions)
1202 if (long_transitions)
1208 int32_t num_long_transitions = 0 ;
1209 for (int32_t i=0; i<
m_N; i++)
1210 for (int32_t j=0; j<
m_N; j++)
1213 num_long_transitions++ ;
1216 if (long_transitions)
1228 SG_DEBUG(
"Using %i long transitions\n", num_long_transitions)
1235 SG_DEBUG(
"maxlook: %d m_N: %d nbest: %d \n", max_look_back,
m_N, nbest)
1236 const int32_t look_back_buflen = (max_look_back*
m_N+1)*nbest ;
1237 SG_DEBUG(
"look_back_buflen=%i\n", look_back_buflen)
1277 memset(fixedtempvv, 0, look_back_buflen*
sizeof(
float64_t)) ;
1278 int32_t * fixedtempii=SG_MALLOC(int32_t, look_back_buflen);
1279 memset(fixedtempii, 0, look_back_buflen*
sizeof(int32_t)) ;
1300 #ifdef DYNPROG_DEBUG 1313 PEN.display_size() ;
1314 PEN_state_signals.display_size() ;
1327 #ifdef USE_TMP_ARRAYCLASS 1328 fixedtempvv.display_size() ;
1329 fixedtempii.display_size() ;
1340 #endif //DYNPROG_DEBUG 1365 for (int16_t k=1; k<nbest; k++)
1367 int32_t dim1, dim2, dim3 ;
1411 for (int16_t k=0; k<nbest; k++)
1413 delta.
element(delta_array, t, j, k, m_seq_len, m_N) = seq.
element(j,t) ;
1422 const T_STATES num_elem = trans_list_forward_cnt[j] ;
1423 const T_STATES *elem_list = trans_list_forward[j] ;
1424 const float64_t *elem_val = trans_list_forward_val[j] ;
1425 const int32_t *elem_id = trans_list_forward_id[j] ;
1427 int32_t fixed_list_len = 0 ;
1429 int32_t fixedtempii_ = 0 ;
1430 bool fixedtemplong = false ;
1432 for (int32_t i=0; i<num_elem; i++)
1449 int32_t look_back_ = look_back.
element(j, ii) ;
1453 if((orf_from!=-1)!=(orf_to!=-1))
1454 SG_DEBUG(
"j=%i ii=%i orf_from=%i orf_to=%i p=%1.2f\n", j, ii, orf_from, orf_to, elem_val[i])
1455 ASSERT((orf_from!=-1)==(orf_to!=-1))
1457 int32_t orf_target = -1 ;
1460 orf_target=orf_to-orf_from ;
1463 ASSERT(orf_target>=0 && orf_target<3)
1466 int32_t orf_last_pos =
m_pos[t] ;
1467 #ifdef DYNPROG_TIMING 1470 int32_t num_ok_pos = 0 ;
1472 for (int32_t ts=t-1; ts>=0 &&
m_pos[t]-
m_pos[ts]<=look_back_; ts--)
1486 else if (m_pos[ts]!=-1 && (m_pos[t]-m_pos[ts])%3==orf_target)
1487 ok=(!use_orf) ||
extend_orf(orf_from, orf_to, m_pos[ts], orf_last_pos, m_pos[t]) ;
1505 int32_t frame = orf_from;
1511 #ifdef DYNPROG_TIMING_DETAIL 1514 pen_val = penalty->
lookup_penalty(m_pos[t]-m_pos[ts], svm_value) ;
1516 #ifdef DYNPROG_TIMING_DETAIL 1518 content_plifs_time += MyTime.time_diff_sec() ;
1522 #ifdef DYNPROG_TIMING_DETAIL 1531 val += segment_loss ;
1533 float64_t mval = -(val + delta.
element(delta_array, ts, ii, 0, m_seq_len, m_N)) ;
1535 if (mval<fixedtempvv_)
1537 fixedtempvv_ = mval ;
1538 fixedtempii_ = ii + ts*
m_N;
1539 fixed_list_len = 1 ;
1540 fixedtemplong = false ;
1545 for (int16_t diff=0; diff<nbest; diff++)
1550 val += segment_loss ;
1552 float64_t mval = -(val + delta.
element(delta_array, ts, ii, diff, m_seq_len, m_N)) ;
1558 if ((fixed_list_len < nbest) || ((0==fixed_list_len) || (mval < fixedtempvv[fixed_list_len-1])))
1560 if ( (fixed_list_len<nbest) && ((0==fixed_list_len) || (mval>fixedtempvv[fixed_list_len-1])) )
1562 fixedtempvv[fixed_list_len] = mval ;
1563 fixedtempii[fixed_list_len] = ii + diff*m_N + ts*m_N*nbest;
1568 int32_t addhere = fixed_list_len;
1569 while ((addhere > 0) && (mval < fixedtempvv[addhere-1]))
1573 for (int32_t jj=fixed_list_len-1; jj>addhere; jj--)
1575 fixedtempvv[jj] = fixedtempvv[jj-1];
1576 fixedtempii[jj] = fixedtempii[jj-1];
1579 fixedtempvv[addhere] = mval;
1580 fixedtempii[addhere] = ii + diff*m_N + ts*m_N*nbest;
1582 if (fixed_list_len < nbest)
1588 #ifdef DYNPROG_TIMING_DETAIL 1590 inner_loop_max_time += MyTime.time_diff_sec() ;
1594 #ifdef DYNPROG_TIMING 1596 inner_loop_time += MyTime3.time_diff_sec() ;
1599 for (int32_t i=0; i<num_elem; i++)
1616 int32_t look_back_ = look_back.
element(j, ii) ;
1621 if((orf_from!=-1)!=(orf_to!=-1))
1622 SG_DEBUG(
"j=%i ii=%i orf_from=%i orf_to=%i p=%1.2f\n", j, ii, orf_from, orf_to, elem_val[i])
1623 ASSERT((orf_from!=-1)==(orf_to!=-1))
1625 int32_t orf_target = -1 ;
1628 orf_target=orf_to-orf_from ;
1631 ASSERT(orf_target>=0 && orf_target<3)
1637 #ifdef DYNPROG_TIMING 1648 #ifdef DYNPROG_TIMING 1657 int32_t start = long_transition_content_start.
get_element(ii, j) ;
1658 int32_t end_5p_part = start ;
1662 while (end_5p_part<=t &&
m_pos[end_5p_part+1]-
m_pos[start_5p_part]<=m_long_transition_threshold)
1665 ASSERT(
m_pos[end_5p_part+1]-
m_pos[start_5p_part] > m_long_transition_threshold || end_5p_part==t)
1666 ASSERT(
m_pos[end_5p_part]-
m_pos[start_5p_part] <= m_long_transition_threshold)
1683 float64_t mval_trans = -( elem_val[i] + pen_val*0.5 + delta.
element(delta_array, start_5p_part, ii, 0, m_seq_len, m_N) ) ;
1692 mval_trans -= segment_loss_part1 ;
1703 long_transition_content_start_position.
set_element(0, ii, j) ;
1705 long_transition_content_scores_loss.
set_element(0.0, ii, j) ;
1706 #ifdef DYNPROG_DEBUG 1707 long_transition_content_scores_pen.
set_element(0.0, ii, j) ;
1708 long_transition_content_scores_elem.
set_element(0.0, ii, j) ;
1709 long_transition_content_scores_prev.
set_element(0.0, ii, j) ;
1710 long_transition_content_end_position.
set_element(0, ii, j) ;
1718 long_transition_content_scores.
set_element(score, ii, j) ;
1719 long_transition_content_scores_loss.
set_element(new_loss, ii, j) ;
1720 #ifdef DYNPROG_DEBUG 1721 long_transition_content_end_position.
set_element(end_5p_part, ii, j) ;
1725 if (-long_transition_content_scores.
get_element(ii, j) > mval_trans )
1728 long_transition_content_scores.
set_element(-mval_trans, ii, j) ;
1729 long_transition_content_start_position.
set_element(start_5p_part, ii, j) ;
1731 long_transition_content_scores_loss.
set_element(segment_loss_part1, ii, j) ;
1732 #ifdef DYNPROG_DEBUG 1733 long_transition_content_scores_pen.
set_element(pen_val*0.5, ii, j) ;
1734 long_transition_content_scores_elem.
set_element(elem_val[i], ii, j) ;
1735 long_transition_content_scores_prev.
set_element(delta.
element(delta_array, start_5p_part, ii, 0, m_seq_len, m_N), ii, j) ;
1739 long_transition_content_end_position.
set_element(end_5p_part, ii, j) ;
1746 long_transition_content_start.
set_element(start_5p_part, ii, j) ;
1756 while (ts>0 &&
m_pos[t]-
m_pos[ts-1] <= m_long_transition_threshold)
1765 float pen_val_3p = 0.0 ;
1768 int32_t frame = orf_from ;
1776 #ifdef DYNPROG_DEBUG 1786 #ifdef DYNPROG_DEBUG 1793 mval -= (segment_loss_total-long_transition_content_scores_loss.
get_element(ii, j)) ;
1796 #ifdef DYNPROG_DEBUG 1799 SG_PRINT(
"Part2: %i,%i,%i: val=%1.6f pen_val_3p*0.5=%1.6f (t=%i, ts=%i, ts-1=%i, ts+1=%i) scores=%1.6f (pen=%1.6f,prev=%1.6f,elem=%1.6f,loss=%1.1f), positions=%i,%i,%i, loss=%1.1f/%1.1f (%i,%i)\n",
1801 long_transition_content_scores.
get_element(ii, j),
1802 long_transition_content_scores_pen.
get_element(ii, j),
1803 long_transition_content_scores_prev.
get_element(ii, j),
1804 long_transition_content_scores_elem.
get_element(ii, j),
1805 long_transition_content_scores_loss.
get_element(ii, j),
1808 m_pos[long_transition_content_start.
get_element(ii,j)], segment_loss_part2, segment_loss_total, long_transition_content_start_position.
get_element(ii,j), t) ;
1809 SG_PRINT(
"fixedtempvv_: %1.6f, from_state:%i from_pos:%i\n ",-fixedtempvv_, (fixedtempii_%m_N), m_pos[(fixedtempii_-(fixedtempii_%(m_N*nbest)))/(m_N*nbest)] )
1812 if (fabs(segment_loss_part2+long_transition_content_scores_loss.
get_element(ii, j) - segment_loss_total)>1e-3)
1814 SG_ERROR(
"LOSS: total=%1.1f (%i-%i) part1=%1.1f/%1.1f (%i-%i) part2=%1.1f (%i-%i) sum=%1.1f diff=%1.1f\n",
1816 long_transition_content_scores_loss.
get_element(ii, j), segment_loss_part1,
m_pos[long_transition_content_start_position.
get_element(ii,j)], m_pos[long_transition_content_end_position.
get_element(ii,j)],
1817 segment_loss_part2, m_pos[long_transition_content_end_position.
get_element(ii,j)], m_pos[t],
1818 segment_loss_part2+long_transition_content_scores_loss.
get_element(ii, j),
1819 segment_loss_part2+long_transition_content_scores_loss.
get_element(ii, j) - segment_loss_total) ;
1829 if (mval < fixedtempvv_)
1832 int32_t fromtjk = fixedtempii_ ;
1838 ASSERT((fromtjk-(fromtjk%(m_N*nbest)))/(m_N*nbest)==0 ||
m_pos[(fromtjk-(fromtjk%(m_N*nbest)))/(m_N*nbest)]>=
m_pos[long_transition_content_start_position.
get_element(ii, j)] || fixedtemplong)
1840 fixedtempvv_ = mval ;
1841 fixedtempii_ = ii + m_N*long_transition_content_start_position.
get_element(ii, j) ;
1842 fixed_list_len = 1 ;
1843 fixedtemplong = true ;
1848 #ifdef DYNPROG_TIMING 1850 long_transition_time += MyTime3.time_diff_sec() ;
1854 int32_t numEnt = fixed_list_len;
1859 for (int16_t k=0; k<nbest; k++)
1865 minusscore = fixedtempvv_ ;
1866 fromtjk = fixedtempii_ ;
1870 minusscore = fixedtempvv[k];
1871 fromtjk = fixedtempii[k];
1874 delta.
element(delta_array, t, j, k, m_seq_len, m_N) = -minusscore + seq.
element(j,t);
1877 ktable.
element(t,j,k) = (fromtjk%(m_N*nbest)-psi.
element(t,j,k))/m_N ;
1878 ptable.
element(t,j,k) = (fromtjk-(fromtjk%(m_N*nbest)))/(m_N*nbest) ;
1893 int32_t list_len = 0 ;
1894 for (int16_t diff=0; diff<nbest; diff++)
1898 oldtempvv[list_len] = -(delta.
element(delta_array, (m_seq_len-1), i, diff, m_seq_len, m_N)+
get_q(i)) ;
1899 oldtempii[list_len] = i + diff*
m_N ;
1906 for (int16_t k=0; k<nbest; k++)
1908 delta_end.
element(k) = -oldtempvv[k] ;
1918 for (int16_t k=0; k<nbest; k++)
1920 prob_nbest[k]= delta_end.
element(k) ;
1923 state_seq[i] = path_ends.
element(k) ;
1927 pos_seq[i] = m_seq_len-1 ;
1929 while (pos_seq[i]>0)
1933 state_seq[i+1] = psi.
element(pos_seq[i], state_seq[i], q);
1934 pos_seq[i+1] = ptable.
element(pos_seq[i], state_seq[i], q) ;
1936 q = ktable.
element(pos_seq[i], state_seq[i], q) ;
1940 int32_t num_states = i+1 ;
1941 for (i=0; i<num_states;i++)
1943 my_state_seq[i+k*
m_seq_len] = state_seq[num_states-i-1] ;
1944 my_pos_seq[i+k*
m_seq_len] = pos_seq[num_states-i-1] ;
1946 if (num_states<m_seq_len)
1948 my_state_seq[num_states+k*
m_seq_len]=-1 ;
1958 #ifdef DYNPROG_TIMING 1962 SG_PRINT(
"Timing: orf=%1.2f s \n Segment_init=%1.2f s Segment_pos=%1.2f s Segment_extend=%1.2f s Segment_clean=%1.2f s\nsvm_init=%1.2f s svm_pos=%1.2f svm_clean=%1.2f\n content_svm_values_time=%1.2f content_plifs_time=%1.2f\ninner_loop_max_time=%1.2f inner_loop=%1.2f long_transition_time=%1.2f\n total=%1.2f\n", orf_time, segment_init_time, segment_pos_time, segment_extend_time, segment_clean_time, svm_init_time, svm_pos_time, svm_clean_time, content_svm_values_time, content_plifs_time, inner_loop_max_time, inner_loop_time, long_transition_time, MyTime2.time_diff_sec())
1965 SG_FREE(fixedtempvv);
1966 SG_FREE(fixedtempii);
1971 int32_t *my_state_seq, int32_t *my_pos_seq,
1972 int32_t my_seq_len,
const float64_t *seq_array, int32_t max_num_signals)
1996 bool use_svm = false ;
2005 for (int32_t i=0; i<
m_N; i++)
2006 for (int32_t j=0; j<
m_N; j++)
2016 for (int32_t i=0; i<
m_N; i++)
2017 for (int32_t j=0; j<max_num_signals; j++)
2030 for (int32_t i=0; i<
m_N; i++)
2034 for (int32_t j=0; j<
m_N; j++)
2040 for (int32_t i=0; i<my_seq_len; i++)
2059 svm_value_part1[s]=0 ;
2060 svm_value_part2[s]=0 ;
2068 ASSERT(my_state_seq[0]>=0)
2072 ASSERT(my_state_seq[my_seq_len-1]>=0)
2077 total_score += my_scores[0] + my_scores[my_seq_len-1] ;
2080 SG_DEBUG(
"m_seq_len=%i\n", my_seq_len)
2081 for (int32_t i=0; i<my_seq_len-1; i++)
2083 if (my_state_seq[i+1]==-1)
2085 int32_t from_state = my_state_seq[i] ;
2086 int32_t to_state = my_state_seq[i+1] ;
2087 int32_t from_pos = my_pos_seq[i] ;
2088 int32_t to_pos = my_pos_seq[i+1] ;
2093 #ifdef DYNPROG_DEBUG 2101 SG_PRINT(
"loss1:%f loss2:%f loss3:%f, diff:%f\n", loss1, loss2, loss3, loss1+loss2-loss3)
2104 SG_PRINT(
"%i. segment loss %f (id=%i): from=%i(%i), to=%i(%i)\n", i, my_losses[i], elem_id, from_pos, from_state, to_pos, to_state)
2108 SG_DEBUG(
"%i. segment loss %f (id=%i): from=%i(%i), to=%i(%i)\n", i, my_losses[i], elem_id, from_pos, from_state, to_pos, to_state)
2114 #ifdef DYNPROG_DEBUG 2115 SG_DEBUG(
"%i. scores[i]=%f\n", i, my_scores[i])
2122 bool is_long_transition = false ;
2126 is_long_transition = true ;
2128 is_long_transition =
false ;
2131 int32_t from_pos_thresh = from_pos ;
2132 int32_t to_pos_thresh = to_pos ;
2136 if (is_long_transition)
2141 ASSERT(from_pos_thresh<to_pos)
2148 #ifdef DYNPROG_DEBUG 2149 SG_PRINT(
"part1: pos1: %i pos2: %i pos3: %i \nsvm_value_part1: ",
m_pos[from_pos],
m_pos[from_pos_thresh],
m_pos[from_pos_thresh+1])
2151 SG_PRINT(
"%1.4f ", svm_value_part1[s])
2163 #ifdef DYNPROG_DEBUG 2164 SG_PRINT(
"part2: pos1: %i pos2: %i pos3: %i \nsvm_value_part2: ",
m_pos[to_pos],
m_pos[to_pos_thresh],
m_pos[to_pos_thresh+1])
2166 SG_PRINT(
"%1.4f ", svm_value_part2[s])
2178 int32_t num_current_svms=0;
2179 int32_t svm_ids[] = {-8, -7, -6, -5, -4, -3, -2, -1};
2180 SG_PRINT(
"penalties(%i, %i), frame:%i ", from_state, to_state, frame)
2181 ((
CPlifBase*) PEN.
element(to_state, from_state))->get_used_svms(&num_current_svms, svm_ids);
2186 #ifdef DYNPROG_DEBUG 2195 if (PEN.
element(to_state, from_state)!=NULL)
2198 if (is_long_transition)
2202 nscore= 0.5*pen_value_part1 + 0.5*pen_value_part2 ;
2208 SG_PRINT(
"is_long_transition=%i (from_pos=%i (%i), to_pos=%i (%i)=> %1.5f\n",
2209 is_long_transition,
m_pos[from_pos], from_state,
m_pos[to_pos], to_state, nscore) ;
2211 my_scores[i] += nscore ;
2220 #ifdef DYNPROG_DEBUG 2223 if (is_long_transition)
2225 #ifdef DYNPROG_DEBUG 2228 for (
int kk=0; kk<i; kk++)
2229 sum_score += my_scores[i] ;
2231 SG_PRINT(
"is_long_transition=%i (from_pos=%i (%i), to_pos=%i (%i)=> %1.5f, %1.5f --- 1: %1.6f (%i-%i) 2: %1.6f (%i-%i) \n",
2232 is_long_transition,
m_pos[from_pos], from_state,
m_pos[to_pos], to_state,
2234 PEN.
element(to_state, from_state)->lookup_penalty(
m_pos[from_pos_thresh]-
m_pos[from_pos], svm_value_part1)*0.5,
m_pos[from_pos],
m_pos[from_pos_thresh],
2235 PEN.
element(to_state, from_state)->lookup_penalty(m_pos[to_pos]-m_pos[to_pos_thresh], svm_value_part2)*0.5, m_pos[to_pos_thresh], m_pos[to_pos]) ;
2239 if (is_long_transition)
2241 ((
CPlifBase*) PEN.
element(to_state, from_state))->penalty_add_derivative(
m_pos[from_pos_thresh]-
m_pos[from_pos], svm_value_part1, 0.5) ;
2242 ((
CPlifBase*) PEN.
element(to_state, from_state))->penalty_add_derivative(
m_pos[to_pos]-
m_pos[to_pos_thresh], svm_value_part2, 0.5) ;
2253 if (is_long_transition)
2261 for (int32_t k=0;k<num_intensities;k++)
2263 for (int32_t j=m_num_lin_feat_plifs_cum[d-1];j<m_num_lin_feat_plifs_cum[d];j++)
2264 svm_value[j]=intensities[k];
2270 for (int32_t k=0;k<num_intensities;k++)
2272 for (int32_t j=m_num_lin_feat_plifs_cum[d-1];j<m_num_lin_feat_plifs_cum[d];j++)
2273 svm_value[j]=intensities[k];
2278 SG_FREE(intensities);
2291 for (int32_t k=0;k<num_intensities;k++)
2293 for (int32_t j=m_num_lin_feat_plifs_cum[d-1];j<m_num_lin_feat_plifs_cum[d];j++)
2294 svm_value[j]=intensities[k];
2299 SG_FREE(intensities);
2304 #ifdef DYNPROG_DEBUG 2305 SG_DEBUG(
"%i. scores[i]=%f\n", i, my_scores[i])
2309 for (int32_t k=0; k<max_num_signals; k++)
2311 if ((PEN_state_signals.
element(to_state,k)==NULL)&&(k==0))
2313 #ifdef DYNPROG_DEBUG 2314 SG_DEBUG(
"%i. emmission penalty: to_state=%i to_pos=%i score=%1.2f (no signal plif)\n", i, to_state, to_pos, seq_input.
element(to_state, to_pos, k))
2316 my_scores[i] += seq_input.
element(to_state, to_pos, k) ;
2321 if (PEN_state_signals.
element(to_state, k)!=NULL)
2324 my_scores[i] += nscore ;
2325 #ifdef DYNPROG_DEBUG 2328 SG_PRINT(
"is_long_transition=%i (from_pos=%i (%i), from_state=%i, to_pos=%i (%i) to_state=%i=> %1.5f, dim3:%i, seq_input.element(to_state, to_pos, k): %1.4f\n",
2329 is_long_transition,
m_pos[from_pos], from_pos, from_state,
m_pos[to_pos], to_pos, to_state, nscore, k, seq_input.
element(to_state, to_pos, k)) ;
2330 for (
int x=0; x<23; x++)
2332 for (
int i=-10; i<10; i++)
2347 #ifdef DYNPROG_DEBUG 2348 SG_DEBUG(
"%i. emmission penalty: to_state=%i to_pos=%i value=%1.2f score=%1.2f k=%i\n", i, to_state, to_pos, seq_input.
element(to_state, to_pos, k), nscore, k)
2350 ((
CPlifBase*) PEN_state_signals.
element(to_state,k))->penalty_add_derivative(seq_input.
element(to_state, to_pos, k), svm_value, 1) ;
2358 total_score += my_scores[i] ;
2359 total_loss += my_losses[i] ;
2367 SG_FREE(svm_value_part1);
2368 SG_FREE(svm_value_part2);
2374 int32_t num_intensities = 0;
2378 int32_t num = m_num_probes_cum[type-1];
2379 while (*p_tiling_pos<to_pos)
2381 if (*p_tiling_pos>=from_pos)
2383 intensities[num_intensities] = *p_tiling_data;
2387 if (num>=m_num_probes_cum[type])
2389 last_pos = *p_tiling_pos;
2392 ASSERT(last_pos<*p_tiling_pos)
2394 return num_intensities;
2399 #ifdef DYNPROG_TIMING_DETAIL 2409 svm_values[i] = (to_val-from_val)/(to_pos-from_pos);
2415 svm_values[i] = to_val-from_val ;
2421 int32_t intron_list_start = m_num_lin_feat_plifs_cum[
m_num_raw_data];
2424 for (int32_t i=intron_list_start; i<intron_list_end;i++)
2426 svm_values[i] = (
float64_t) (support[cnt]);
2437 svm_values[frame_plifs[1]] = 1e10;
2438 svm_values[frame_plifs[2]] = 1e10;
2439 int32_t global_frame = from_pos%3;
2440 int32_t row = ((global_frame+frame)%3)+4;
2443 svm_values[frame+frame_plifs[0]] = (to_val-from_val)/(to_pos-from_pos);
2445 #ifdef DYNPROG_TIMING_DETAIL 2447 content_svm_values_time += MyTime.time_diff_sec() ;
virtual float64_t get_max_value() const =0
void set_array(T *p_array, int32_t p_num_elements, int32_t array_size)
CDynamicArray< float64_t > m_end_state_distribution_q_deriv
void set_loglevel(EMessageType level)
bool set_element(T e, int32_t idx1, int32_t idx2=0, int32_t idx3=0)
CDynamicArray< float64_t > m_segment_loss
CPlifMatrix * m_plif_matrices
float64_t get_p(T_STATES offset) const
CDynamicArray< int32_t > m_positions
void best_path_trans_deriv(int32_t *my_state_seq, int32_t *my_pos_seq, int32_t my_seq_len, const float64_t *seq_array, int32_t max_num_signals)
CDynamicArray< float64_t > m_dict_weights
static int is_finite(double f)
checks whether a float is finite
static int32_t cum_num_words_default[5]
CDynamicArray< int32_t > m_segment_ids
void set_dict_weights(SGMatrix< float64_t > dictionary_weights)
void set_my_state_seq(int32_t *my_state_seq)
static float64_t ceil(float64_t d)
void set_plif_matrices(CPlifMatrix *pm)
static const float64_t INFTY
infinity
void create_word_string()
int32_t m_N
number of states
static void nmin(float64_t *output, T *index, int32_t size, int32_t n)
void set_observation_matrix(SGNDArray< float64_t > seq)
void set_gene_string(SGVector< char > genestr)
void set_orf_info(SGMatrix< int32_t > orf_info)
void get_path_scores(float64_t **my_scores, int32_t *seq_len)
CPlifBase ** get_plif_matrix()
CDynamicArray< int32_t > m_transition_matrix_a_id
transition matrix
float32_t get_segment_loss(int32_t from_pos, int32_t to_pos, int32_t segment_id)
static void translate_from_single_order(ST *obs, int32_t sequence_length, int32_t start, int32_t p_order, int32_t max_val)
CDynamicArray< int32_t > m_word_degree
int32_t * m_cum_num_words_array
SGVector< float64_t > get_scores()
int32_t * m_mod_words_array
CDynamicArray< float64_t > m_lin_feat
void compute_loss(int32_t *all_pos, int32_t len)
CPlifBase ** get_state_signals()
int32_t raw_intensities_interval_query(const int32_t from_pos, const int32_t to_pos, float64_t *intensities, int32_t type)
int32_t m_num_intron_plifs
void best_path_set_segment_ids_mask(int32_t *segment_ids, float64_t *segment_mask, int32_t m)
void set_pos(SGVector< int32_t > pos)
virtual float64_t lookup_penalty(float64_t p_value, float64_t *svm_values) const =0
CDynamicArray< float64_t > m_initial_state_distribution_p_deriv
int32_t get_num_positions()
CDynProg(int32_t p_num_svms=8)
void set_const(const T &const_element)
CSparseFeatures< float64_t > * m_seq_sparse1
CSparseFeatures< float64_t > * m_seq_sparse2
int32_t * m_num_probes_cum
float32_t get_segment_loss_extend(int32_t from_pos, int32_t to_pos, int32_t segment_id)
void precompute_content_values()
int32_t * m_num_lin_feat_plifs_cum
CDynamicArray< float64_t > m_initial_state_distribution_p
initial distribution of states
CDynamicArray< int32_t > m_mod_words
void init_mod_words_array(SGMatrix< int32_t > p_mod_words_array)
void set_a(SGMatrix< float64_t > a)
CDynamicArray< float64_t > m_transition_matrix_a_deriv
void set_intron_list(CIntronList *intron_list, int32_t num_plifs)
Class SGObject is the base class of all shogun objects.
void set_q_vector(SGVector< float64_t > q)
CDynamicArray< int32_t > m_num_unique_words
CSGObject * element(int32_t idx1, int32_t idx2=0, int32_t idx3=0)
CDynamicArray< float64_t > m_transition_matrix_a
static int32_t mod_words_default[32]
void resize_lin_feat(int32_t num_new_feat)
void set_segment_ids(CDynamicArray< int32_t > *segment_ids)
void precompute_stop_codons()
CDynamicArray< bool > m_genestr_stop
void lookup_content_svm_values(const int32_t from_state, const int32_t to_state, const int32_t from_pos, const int32_t to_pos, float64_t *svm_values, int32_t frame)
CDynamicArray< int32_t > m_orf_info
float64_t lookup_penalty(float64_t p_value, float64_t *svm_values) const
SGMatrix< int32_t > get_positions()
int32_t * m_string_words_array
virtual bool uses_svm_values() const =0
CDynamicArray< int32_t > m_my_pos_seq
SGMatrix< int32_t > get_states()
CDynamicArray< int32_t > m_states
static int32_t frame_plifs[3]
CDynamicArray< int32_t > m_cum_num_words
void best_path_set_segment_loss(SGMatrix< float64_t > segment_loss)
CDynamicArray< int32_t > m_string_words
CDynamicArray< int32_t > m_pos
void set_content_type_array(SGMatrix< float64_t > seg_path)
Dynamic array class for CSGObject pointers that creates an array that can be used like a list or an a...
void precompute_tiling_plifs(CPlif **PEN, const int32_t *tiling_plif_ids, const int32_t num_tiling_plifs)
int32_t m_long_transition_threshold
CDynamicArray< float64_t > m_end_state_distribution_q
distribution of end-states
void set_a_trans_matrix(SGMatrix< float64_t > a_trans)
float64_t get_q(T_STATES offset) const
float64_t * m_raw_intensities
CDynamicArray< float64_t > m_my_losses
CDynamicArray< float64_t > m_segment_mask
CDynamicArray< char > m_genestr
void set_my_pos_seq(int32_t *my_pos_seq)
const T & element(int32_t idx1, int32_t idx2=0, int32_t idx3=0) const
CDynamicArray< float64_t > m_scores
all of classes and functions are contained in the shogun namespace
const T & get_element(int32_t idx1, int32_t idx2=0, int32_t idx3=0) const
CDynamicArray< float64_t > m_observation_matrix
CDynamicArray< bool > m_sign_words
void get_intron_support(int32_t *values, int32_t from_pos, int32_t to_pos)
CDynamicArray< int32_t > m_num_words
ST get_feature(int32_t num, int32_t index)
CDynamicArray< float64_t > m_my_scores
void set_p_vector(SGVector< float64_t > p)
void get_path_losses(float64_t **my_losses, int32_t *seq_len)
CIntronList * m_intron_list
int32_t get_use_svm() const
void init_tiling_data(int32_t *probe_pos, float64_t *intensities, const int32_t num_probes)
virtual void penalty_clear_derivative()=0
static bool sign_words_default[16]
static int32_t word_degree_default[4]
void set_num_states(int32_t N)
CSegmentLoss * m_seg_loss_obj
bool extend_orf(int32_t orf_from, int32_t orf_to, int32_t start, int32_t &last_pos, int32_t to)
static int32_t string_words_default[16]
static int32_t num_words_default[4]
CDynamicArray< int32_t > m_my_state_seq
void set_segment_mask(CDynamicArray< float64_t > *segment_mask)
void init_content_svm_value_array(const int32_t p_num_svms)
bool resize_array(int32_t ndim1, int32_t ndim2=1, int32_t ndim3=1)
void compute_nbest_paths(int32_t max_num_signals, bool use_orf, int16_t nbest, bool with_loss, bool with_multiple_sequences)
store plif arrays for all transitions in the model
void set_sparse_features(CSparseFeatures< float64_t > *seq_sparse1, CSparseFeatures< float64_t > *seq_sparse2)
void set_a_id(SGMatrix< int32_t > a)