21 #include <shogun/lib/external/cdflib.hpp> 24 using namespace Eigen;
30 REQUIRE(values.
vlen>1,
"Number of observations (%d) needs to be at least 1.\n",
37 sum_squared_diff+=CMath::pow(values.
vector[i]-mean, 2);
39 return sum_squared_diff/(values.
vlen-1);
58 result[j]+=values(i,j);
70 result[j]+=values(j,i);
98 result[j]+=CMath::pow(values(i,j)-mean[j], 2);
110 result[j]+=CMath::pow(values(j,i)-mean[j], 2);
121 return CMath::sqrt(variance(values));
129 var[i]=CMath::sqrt(var[i]);
139 SG_SDEBUG(
"%d observations in %d dimensions\n", N, D)
141 REQUIRE(N>1,
"Number of observations (%d) must be at least 2.\n", N);
142 REQUIRE(D>0,
"Number of dimensions (%d) must be at least 1.\n", D);
150 int64_t num_elements = N*D;
156 eigen_centered.colwise() -= eigen_centered.rowwise().mean();
159 SG_SDEBUG(
"Computing squared differences\n");
162 eigen_cov = (eigen_centered * eigen_centered.adjoint()) /
double(N - 1);
174 for (int32_t i=0; i<len; i++)
177 v.
vector[i]=fishers_exact_test_for_2x3_table(table);
182 float64_t CStatistics::fishers_exact_test_for_2x3_table(
197 int32_t x_len=2*3*CMath::sq(
CMath::max(m, m_len));
202 for (int32_t i=0; i<3+2; i++)
203 log_nom+=lgamma(m[i]+1);
204 log_nom-=lgamma(n+1.0);
209 for (int32_t i=0; i<3*2; i++)
217 int32_t dim1=CMath::min(m[0], m[2]);
221 for (int32_t k=0; k<=dim1; k++)
224 l<=CMath::min(m[0]-k, m[3]); l++)
226 x[0+0*2+counter*2*3]=k;
227 x[0+1*2+counter*2*3]=l;
228 x[0+2*2+counter*2*3]=m[0]-x[0+0*2+counter*2*3]-x[0+1*2+counter*2*3];
229 x[1+0*2+counter*2*3]=m[2]-x[0+0*2+counter*2*3];
230 x[1+1*2+counter*2*3]=m[3]-x[0+1*2+counter*2*3];
231 x[1+2*2+counter*2*3]=m[4]-x[0+2*2+counter*2*3];
238 #ifdef DEBUG_FISHER_TABLE 243 SG_SPRINT(
"prob_table_log=%.18Lg\n", prob_table_log)
244 SG_SPRINT(
"log_denomf=%.18g\n", log_denomf)
245 SG_SPRINT(
"log_denom=%.18Lg\n", log_denom)
247 display_vector(m, m_len,
"marginals");
248 display_vector(x, 2*3*counter,
"x");
249 #endif // DEBUG_FISHER_TABLE 254 for (int32_t k=0; k<counter; k++)
256 for (int32_t j=0; j<3; j++)
258 for (int32_t i=0; i<2; i++)
259 log_denom_vec[k]+=lgammal(x[i+j*2+k*2*3]+1.0);
263 for (int32_t i=0; i<counter; i++)
264 log_denom_vec[i]=log_nom-log_denom_vec[i];
266 #ifdef DEBUG_FISHER_TABLE 267 display_vector(log_denom_vec, counter,
"log_denom_vec");
268 #endif // DEBUG_FISHER_TABLE 271 for (int32_t i=0; i<counter; i++)
273 if (log_denom_vec[i]<=prob_table_log)
274 nonrand_p=CMath::logarithmic_sum(nonrand_p, log_denom_vec[i]);
277 #ifdef DEBUG_FISHER_TABLE 278 SG_SPRINT(
"nonrand_p=%.18g\n", nonrand_p)
279 SG_SPRINT(
"exp_nonrand_p=%.18g\n", CMath::exp(nonrand_p))
280 #endif // DEBUG_FISHER_TABLE 281 nonrand_p=CMath::exp(nonrand_p);
283 SG_FREE(log_denom_vec);
294 for (int32_t i=0; i<len; i++)
295 for (int32_t j=0; j<len; j++)
296 e+=exp(p2[j*len+i])*(p2[j*len+i]-p1[i]-p1[j]);
305 for (int32_t i=0; i<len; i++)
306 e+=exp(p[i])*(p[i]-q[i]);
315 for (int32_t i=0; i<len; i++)
324 "sample size should be less than number of indices\n");
325 int32_t* idxs=SG_MALLOC(int32_t,N);
327 int32_t* permuted_idxs=SG_MALLOC(int32_t,sample_size);
332 for (i=0; i<sample_size; i++)
333 permuted_idxs[i]=idxs[i];
334 for (i=sample_size; i<N; i++)
336 rnd=CMath::random(1, i);
338 permuted_idxs[rnd]=idxs[i];
343 CMath::qsort(result);
350 REQUIRE(p>=0,
"p (%f); must be greater or equal to 0.\n", p);
351 REQUIRE(p<=1,
"p (%f); must be greater or equal to 1.\n", p);
352 REQUIRE(std_deviation>0,
"Standard deviation (%f); must be positive\n",
362 cdfnor(&which, &p, &q, &output_x, &mean, &std_deviation, &output_status, &output_bound);
364 if (output_status!=0)
365 SG_SERROR(
"Error %d while calling cdflib::cdfnor\n", output_status);
375 REQUIRE(x>=0,
"x (%f) has to be greater or equal to 0.\n", x);
376 REQUIRE(k>0,
"Degrees of freedom (%f) has to be positive.\n", k);
386 cdfchi(&which, &output_p, &output_q, &x, &df, &output_status, &output_bound);
388 if (output_status!=0)
389 SG_SERROR(
"Error %d while calling cdflib::cdfchi\n", output_status);
396 REQUIRE(x>=0,
"x (%f) has to be greater or equal to 0.\n", x);
397 REQUIRE(a>=0,
"a (%f) has to be greater or equal to 0.\n", a);
398 REQUIRE(b>=0,
"b (%f) has to be greater or equal to 0.\n", b);
407 int output_error_code;
409 cdfgam(&which, &output_p, &output_q, &x, &shape, &scale, &output_error_code, &output_bound);
411 if (output_error_code!=0)
412 SG_SERROR(
"Error %d while calling cdflib::cdfgam\n", output_error_code);
425 const float64_t sqrt_of_2=1.41421356237309514547;
426 const float64_t log_of_2=0.69314718055994528623;
427 const float64_t sqrt_of_pi=1.77245385090551588192;
439 -0.01621575378835404,
441 -0.001829764677455021,
442 2.0*(1.0-CMath::PI/3.0),
451 float64_t lp0 = -x/(sqrt_of_2*sqrt_of_pi);
452 for (
index_t i=0; i<c_len; i++)
453 f=lp0*(c_array[i]+f);
454 return -2.0*f-log_of_2;
456 else if (x<ERFC_CASE2)
457 return CMath::log(erfc8_weighted_sum(x))-log_of_2-x*x*0.5;
460 return CMath::log(normal_cdf(x));
467 const float64_t sqrt_of_2=1.41421356237309514547;
471 0.5641895835477550741253201704,
472 1.275366644729965952479585264,
473 5.019049726784267463450058,
474 6.1602098531096305440906,
475 7.409740605964741794425,
476 2.97886562639399288862
482 2.260528520767326969591866945,
483 9.396034016235054150430579648,
484 12.0489519278551290360340491,
485 17.08144074746600431571095,
486 9.608965327192787870698,
487 3.3690752069827527677
495 num=-x*num/sqrt_of_2+P[i];
501 den=-x*den/sqrt_of_2+Q[i];
509 return 0.5*(erfc(-x*M_SQRT1_2/std_dev));
515 REQUIRE(p>=0,
"p (%f) has to be greater or equal to 0.\n", p);
516 REQUIRE(a>=0,
"a (%f) has to be greater or equal to 0.\n", a);
517 REQUIRE(b>=0,
"b (%f) has to be greater or equal to 0.\n", b);
525 int output_error_code=0;
528 cdfgam(&which, &p, &q, &output_x, &shape, &scale, &output_error_code, &output_bound);
530 if (output_error_code!=0)
531 SG_SERROR(
"Error %d while calling cdflib::beta_inc\n", output_error_code);
538 REQUIRE(x>=0,
"x (%f) has to be greater or equal to 0.\n", x);
539 REQUIRE(d1>0,
"d1 (%f) has to be positive.\n", d1);
540 REQUIRE(d2>0,
"d2 (%f) has to be positive.\n", d2);
547 int output_error_code;
549 cdff(&which, &output_p, &output_q, &x, &d1, &d2, &output_error_code, &output_bound);
551 if (output_error_code!=0)
552 SG_SERROR(
"Error %d while calling cdflib::cdff\n", output_error_code);
565 result=CMath::PI/CMath::tan(CMath::PI*x);
581 0.04166666666666666667,
582 -0.00729166666666666667,
583 0.00384424603174603175,
584 -0.00413411458333333333,
585 0.00756096117424242424,
586 -0.02108249687595390720,
587 0.08332316080729166666,
588 -0.44324627670587277880,
589 3.05393103044765369366,
590 -26.45616165999210241989};
599 result+=coeff[i]*power;
608 REQUIRE(eigen_A.rows()==eigen_A.cols(),
609 "Input matrix should be a sqaure matrix row(%d) col(%d)\n",
610 eigen_A.rows(), eigen_A.cols());
612 PartialPivLU<MatrixXd> lu(eigen_A);
613 VectorXd tmp(eigen_A.rows());
615 for (
index_t idx=0; idx<tmp.rows(); idx++)
618 VectorXd p=lu.permutationP()*tmp;
621 for (
index_t idx=0; idx<p.rows(); idx++)
637 VectorXd u=lu.matrixLU().diagonal();
640 for (
index_t idx=0; idx<u.rows(); idx++)
654 result=u.array().abs().log().sum();
672 VectorXd diag = l.diagonal();
674 for( int32_t i = 0; i < diag.rows(); ++i ) {
675 retval += log(diag(i));
684 typedef SparseMatrix<float64_t> MatrixType;
687 SimplicialLLT<MatrixType> llt;
691 MatrixType L=llt.matrixL();
695 for(
index_t i=0; i<M.rows(); ++i )
696 retval+=log(L.coeff(i,i));
705 REQUIRE(cov.
num_rows>0,
"Number of covariance rows must be positive!\n");
706 REQUIRE(cov.
num_cols>0,
"Number of covariance cols must be positive!\n");
711 int32_t dim=mean.
vlen;
717 for( int32_t j=0; j<N; ++j )
718 for( int32_t i=0; i<dim; ++i )
719 S(i,j)=CMath::randn_double();
726 if( precision_matrix )
737 if( !precision_matrix )
746 for( int32_t i=0; i<N; ++i )
756 "CStatistics::sample_from_gaussian(): \ 757 Number of covariance rows must be positive!\n");
759 "CStatistics::sample_from_gaussian(): \ 760 Number of covariance cols must be positive!\n");
762 "CStatistics::sample_from_gaussian(): \ 763 Covariance is not initialized!\n");
765 "CStatistics::sample_from_gaussian(): \ 766 Covariance should be square matrix!\n");
768 "CStatistics::sample_from_gaussian(): \ 769 Mean and covariance dimension mismatch!\n");
771 int32_t dim=mean.
vlen;
774 typedef SparseMatrix<float64_t> MatrixType;
777 SimplicialLLT<MatrixType> llt;
781 for( int32_t j=0; j<N; ++j )
782 for( int32_t i=0; i<dim; ++i )
783 S(i,j)=CMath::randn_double();
789 MatrixType LP=llt.matrixL();
790 MatrixType UP=llt.matrixU();
794 if( precision_matrix )
797 SimplicialLLT<MatrixType> lltUP;
808 s=llt.permutationPinv()*s;
813 for( int32_t i=0; i<N; ++i )
822 SG_SDEBUG(
"entering CStatistics::fit_sigmoid()\n")
824 REQUIRE(scores.
vector,
"CStatistics::fit_sigmoid() requires " 830 SG_SDEBUG(
"counting number of positive and negative labels\n")
840 SG_SDEBUG(
"%d pos; %d neg\n", prior1, prior0)
854 float64_t hiTarget=(prior1+1.0)/(prior1+2.0);
859 for (
index_t i=0; i<length; ++i)
870 float64_t b=CMath::log((prior0+1.0)/(prior1+1.0));
873 for (
index_t i=0; i<length; ++i)
877 fval+=t[i]*fApB+CMath::log(1+CMath::exp(-fApB));
879 fval+=(t[i]-1)*fApB+CMath::log(1+CMath::exp(fApB));
885 for (it=0; it<maxiter; ++it)
887 SG_SDEBUG(
"Iteration %d, a=%f, b=%f, fval=%f\n", it, a, b, fval)
896 for (
index_t i=0; i<length; ++i)
903 p=CMath::exp(-fApB)/(1.0+CMath::exp(-fApB));
904 q=1.0/(1.0+CMath::exp(-fApB));
908 p=1.0/(1.0+CMath::exp(fApB));
909 q=CMath::exp(fApB)/(1.0+CMath::exp(fApB));
913 h11+=scores[i]*scores[i]*d2;
922 if (CMath::abs(g1)<eps && CMath::abs(g2)<eps)
934 while (stepsize>=minstep)
941 for (
index_t i=0; i<length; ++i)
945 newf+=t[i]*fApB+CMath::log(1+CMath::exp(-fApB));
947 newf+=(t[i]-1)*fApB+CMath::log(1+CMath::exp(fApB));
951 if (newf<fval+0.0001*stepsize*gd)
959 stepsize=stepsize/2.0;
962 if (stepsize<minstep)
964 SG_SWARNING(
"CStatistics::fit_sigmoid(): line search fails, A=%f, " 965 "B=%f, g1=%f, g2=%f, dA=%f, dB=%f, gd=%f\n",
966 a, b, g1, g2, dA, dB, gd);
972 SG_SWARNING(
"CStatistics::fit_sigmoid(): reaching maximal iterations," 973 " g1=%f, g2=%f\n", g1, g2);
976 SG_SDEBUG(
"fitted sigmoid: a=%f, b=%f\n", a, b)
982 SG_SDEBUG(
"leaving CStatistics::fit_sigmoid()\n")
986 const float64_t CStatistics::ERFC_CASE1=0.0492;
988 const float64_t CStatistics::ERFC_CASE2=-11.3137;
void scale(SGVector< T > &a, SGVector< T > &result, T alpha=1)
SGSparseVector< T > * sparse_matrix
array of sparse vectors of size num_vectors
index_t num_vectors
total number of vectors
std::enable_if<!std::is_same< T, complex128_t >::value, float64_t >::type mean(const Container< T > &a)
index_t num_features
total number of features
This class contains some utilities for Eigen3 Sparse Matrix integration with shogun. Currently it provides a method for converting SGSparseMatrix to Eigen3 SparseMatrix.
all of classes and functions are contained in the shogun namespace
T sum(const Container< T > &a, bool no_diag=false)
T max(const Container< T > &a)