1 /** 2 * Cumulative Probability Distribution Functions 3 * 4 * Copyright: Based on the CEPHES math library, which is 5 * Copyright (C) 1994 Stephen L. Moshier (moshier@world.std.com). 6 * License: BSD style: $(LICENSE) 7 * Authors: Stephen L. Moshier (original C code), Don Clugston 8 */ 9 10 /** 11 * Macros: 12 * NAN = $(RED NAN) 13 * SUP = <span style="vertical-align:super;font-size:smaller">$0</span> 14 * GAMMA = Γ 15 * INTEGRAL = ∫ 16 * INTEGRATE = $(BIG ∫<sub>$(SMALL $1)</sub><sup>$2</sup>) 17 * POWER = $1<sup>$2</sup> 18 * BIGSUM = $(BIG Σ <sup>$2</sup><sub>$(SMALL $1)</sub>) 19 * CHOOSE = $(BIG () <sup>$(SMALL $1)</sup><sub>$(SMALL $2)</sub> $(BIG )) 20 * TABLE_SV = <table border=1 cellpadding=4 cellspacing=0> 21 * <caption>Special Values</caption> 22 * $0</table> 23 * SVH = $(TR $(TH $1) $(TH $2)) 24 * SV = $(TR $(TD $1) $(TD $2)) 25 */ 26 27 module tango.math.Probability; 28 static import tango.math.ErrorFunction; 29 private import tango.math.GammaFunction; 30 private import tango.math.Math; 31 private import tango.math.IEEE; 32 33 34 /*** 35 Cumulative distribution function for the Normal distribution, and its complement. 36 37 The normal (or Gaussian, or bell-shaped) distribution is 38 defined as: 39 40 normalDist(x) = 1/$(SQRT) π $(INTEGRAL -$(INFINITY), x) exp( - $(POWER t, 2)/2) dt 41 = 0.5 + 0.5 * erf(x/sqrt(2)) 42 = 0.5 * erfc(- x/sqrt(2)) 43 44 Note that 45 normalDistribution(x) = 1 - normalDistribution(-x). 46 47 Accuracy: 48 Within a few bits of machine resolution over the entire 49 range. 50 51 References: 52 $(LINK http://www.netlib.org/cephes/ldoubdoc.html), 53 G. Marsaglia, "Evaluating the Normal Distribution", 54 Journal of Statistical Software <b>11</b>, (July 2004). 55 */ 56 real normalDistribution(real a) 57 { 58 return tango.math.ErrorFunction.normalDistributionImpl(a); 59 } 60 61 /** ditto */ 62 real normalDistributionCompl(real a) 63 { 64 return -tango.math.ErrorFunction.normalDistributionImpl(-a); 65 } 66 67 /****************************** 68 * Inverse of Normal distribution function 69 * 70 * Returns the argument, x, for which the area under the 71 * Normal probability density function (integrated from 72 * minus infinity to x) is equal to p. 73 * 74 * For small arguments 0 < p < exp(-2), the program computes 75 * z = sqrt( -2 log(p) ); then the approximation is 76 * x = z - log(z)/z - (1/z) P(1/z) / Q(1/z) . 77 * For larger arguments, x/sqrt(2 pi) = w + w^3 R(w^2)/S(w^2)) , 78 * where w = p - 0.5 . 79 */ 80 real normalDistributionInv(real p) 81 { 82 return tango.math.ErrorFunction.normalDistributionInvImpl(p); 83 } 84 85 /** ditto */ 86 real normalDistributionComplInv(real p) 87 { 88 return -tango.math.ErrorFunction.normalDistributionInvImpl(-p); 89 } 90 91 debug(UnitTest) { 92 unittest { 93 assert(feqrel(normalDistributionInv(normalDistribution(0.1)),0.1L)>=real.mant_dig-4); 94 assert(feqrel(normalDistributionComplInv(normalDistributionCompl(0.1)),0.1L)>=real.mant_dig-4); 95 } 96 } 97 98 /** Student's t cumulative distribution function 99 * 100 * Computes the integral from minus infinity to t of the Student 101 * t distribution with integer nu > 0 degrees of freedom: 102 * 103 * $(GAMMA)( (nu+1)/2) / ( sqrt(nu π) $(GAMMA)(nu/2) ) * 104 * $(INTEGRATE -∞, t) $(POWER (1+$(POWER x, 2)/nu), -(nu+1)/2) dx 105 * 106 * Can be used to test whether the means of two normally distributed populations 107 * are equal. 108 * 109 * It is related to the incomplete beta integral: 110 * 1 - studentsDistribution(nu,t) = 0.5 * betaDistribution( nu/2, 1/2, z ) 111 * where 112 * z = nu/(nu + t<sup>2</sup>). 113 * 114 * For t < -1.6, this is the method of computation. For higher t, 115 * a direct method is derived from integration by parts. 116 * Since the function is symmetric about t=0, the area under the 117 * right tail of the density is found by calling the function 118 * with -t instead of t. 119 */ 120 real studentsTDistribution(int nu, real t) 121 in{ 122 assert(nu>0); 123 } 124 body{ 125 /* Based on code from Cephes Math Library Release 2.3: January, 1995 126 Copyright 1984, 1995 by Stephen L. Moshier 127 */ 128 129 if ( nu <= 0 ) return NaN(TANGO_NAN.STUDENTSDDISTRIBUTION_DOMAIN); // domain error -- or should it return 0? 130 if ( t == 0.0 ) return 0.5; 131 132 real rk, z, p; 133 134 if ( t < -1.6 ) { 135 rk = nu; 136 z = rk / (rk + t * t); 137 return 0.5L * betaIncomplete( 0.5L*rk, 0.5L, z ); 138 } 139 140 /* compute integral from -t to + t */ 141 142 rk = nu; /* degrees of freedom */ 143 144 real x; 145 if (t < 0) x = -t; else x = t; 146 z = 1.0L + ( x * x )/rk; 147 148 real f, tz; 149 int j; 150 151 if ( nu & 1) { 152 /* computation for odd nu */ 153 real xsqk = x/sqrt(rk); 154 p = atan( xsqk ); 155 if ( nu > 1 ) { 156 f = 1.0L; 157 tz = 1.0L; 158 j = 3; 159 while( (j<=(nu-2)) && ( (tz/f) > real.epsilon ) ) { 160 tz *= (j-1)/( z * j ); 161 f += tz; 162 j += 2; 163 } 164 p += f * xsqk/z; 165 } 166 p *= 2.0L/PI; 167 } else { 168 /* computation for even nu */ 169 f = 1.0L; 170 tz = 1.0L; 171 j = 2; 172 173 while ( ( j <= (nu-2) ) && ( (tz/f) > real.epsilon ) ) { 174 tz *= (j - 1)/( z * j ); 175 f += tz; 176 j += 2; 177 } 178 p = f * x/sqrt(z*rk); 179 } 180 if ( t < 0.0L ) 181 p = -p; /* note destruction of relative accuracy */ 182 183 p = 0.5L + 0.5L * p; 184 return p; 185 } 186 187 /** Inverse of Student's t distribution 188 * 189 * Given probability p and degrees of freedom nu, 190 * finds the argument t such that the one-sided 191 * studentsDistribution(nu,t) is equal to p. 192 * 193 * Params: 194 * nu = degrees of freedom. Must be >1 195 * p = probability. 0 < p < 1 196 */ 197 real studentsTDistributionInv(int nu, real p ) 198 in { 199 assert(nu>0); 200 assert(p>=0.0L && p<=1.0L); 201 } 202 body 203 { 204 if (p==0) return -real.infinity; 205 if (p==1) return real.infinity; 206 207 real rk, z; 208 rk = nu; 209 210 if ( p > 0.25L && p < 0.75L ) { 211 if ( p == 0.5L ) return 0; 212 z = 1.0L - 2.0L * p; 213 z = betaIncompleteInv( 0.5L, 0.5L*rk, fabs(z) ); 214 real t = sqrt( rk*z/(1.0L-z) ); 215 if( p < 0.5L ) 216 t = -t; 217 return t; 218 } 219 int rflg = -1; // sign of the result 220 if (p >= 0.5L) { 221 p = 1.0L - p; 222 rflg = 1; 223 } 224 z = betaIncompleteInv( 0.5L*rk, 0.5L, 2.0L*p ); 225 226 if (z<0) return rflg * real.infinity; 227 return rflg * sqrt( rk/z - rk ); 228 } 229 230 debug(UnitTest) { 231 unittest { 232 233 // There are simple forms for nu = 1 and nu = 2. 234 235 // if (nu == 1), tDistribution(x) = 0.5 + atan(x)/PI 236 // so tDistributionInv(p) = tan( PI * (p-0.5) ); 237 // nu==2: tDistribution(x) = 0.5 * (1 + x/ sqrt(2+x*x) ) 238 239 assert(studentsTDistribution(1, -0.4)== 0.5 + atan(-0.4)/PI); 240 assert(studentsTDistribution(2, 0.9) == 0.5L * (1 + 0.9L/sqrt(2.0L + 0.9*0.9)) ); 241 assert(studentsTDistribution(2, -5.4) == 0.5L * (1 - 5.4L/sqrt(2.0L + 5.4*5.4)) ); 242 243 // return true if a==b to given number of places. 244 bool isfeqabs(real a, real b, real diff) 245 { 246 return fabs(a-b) < diff; 247 } 248 249 // Check a few spot values with statsoft.com (Mathworld values are wrong!!) 250 // According to statsoft.com, studentsDistributionInv(10, 0.995)= 3.16927. 251 252 // The remaining values listed here are from Excel, and are unlikely to be accurate 253 // in the last decimal places. However, they are helpful as a sanity check. 254 255 // Microsoft Excel 2003 gives TINV(2*(1-0.995), 10) == 3.16927267160917 256 assert(isfeqabs(studentsTDistributionInv(10, 0.995), 3.169_272_67L, 0.000_000_005L)); 257 258 259 assert(isfeqabs(studentsTDistributionInv(8, 0.6), 0.261_921_096_769_043L,0.000_000_000_05L)); 260 // -TINV(2*0.4, 18) == -0.257123042655869 261 262 assert(isfeqabs(studentsTDistributionInv(18, 0.4), -0.257_123_042_655_869L, 0.000_000_000_05L)); 263 assert( feqrel(studentsTDistribution(18, studentsTDistributionInv(18, 0.4L)),0.4L) 264 > real.mant_dig-5 ); 265 assert( feqrel(studentsTDistribution(11, studentsTDistributionInv(11, 0.9L)),0.9L) 266 > real.mant_dig-2); 267 } 268 } 269 270 /** The F distribution, its complement, and inverse. 271 * 272 * The F density function (also known as Snedcor's density or the 273 * variance ratio density) is the density 274 * of x = (u1/df1)/(u2/df2), where u1 and u2 are random 275 * variables having $(POWER χ,2) distributions with df1 276 * and df2 degrees of freedom, respectively. 277 * 278 * fDistribution returns the area from zero to x under the F density 279 * function. The complementary function, 280 * fDistributionCompl, returns the area from x to ∞ under the F density function. 281 * 282 * The inverse of the complemented F distribution, 283 * fDistributionComplInv, finds the argument x such that the integral 284 * from x to infinity of the F density is equal to the given probability y. 285 * 286 * Can be used to test whether the means of multiple normally distributed 287 * populations, all with the same standard deviation, are equal; 288 * or to test that the standard deviations of two normally distributed 289 * populations are equal. 290 * 291 * Params: 292 * df1 = Degrees of freedom of the first variable. Must be >= 1 293 * df2 = Degrees of freedom of the second variable. Must be >= 1 294 * x = Must be >= 0 295 */ 296 real fDistribution(int df1, int df2, real x) 297 in { 298 assert(df1>=1 && df2>=1); 299 assert(x>=0); 300 } 301 body{ 302 real a = cast(real)(df1); 303 real b = cast(real)(df2); 304 real w = a * x; 305 w = w/(b + w); 306 return betaIncomplete(0.5L*a, 0.5L*b, w); 307 } 308 309 /** ditto */ 310 real fDistributionCompl(int df1, int df2, real x) 311 in { 312 assert(df1>=1 && df2>=1); 313 assert(x>=0); 314 } 315 body{ 316 real a = cast(real)(df1); 317 real b = cast(real)(df2); 318 real w = b / (b + a * x); 319 return betaIncomplete( 0.5L*b, 0.5L*a, w ); 320 } 321 322 /* 323 * Inverse of complemented F distribution 324 * 325 * Finds the F density argument x such that the integral 326 * from x to infinity of the F density is equal to the 327 * given probability p. 328 * 329 * This is accomplished using the inverse beta integral 330 * function and the relations 331 * 332 * z = betaIncompleteInv( df2/2, df1/2, p ), 333 * x = df2 (1-z) / (df1 z). 334 * 335 * Note that the following relations hold for the inverse of 336 * the uncomplemented F distribution: 337 * 338 * z = betaIncompleteInv( df1/2, df2/2, p ), 339 * x = df2 z / (df1 (1-z)). 340 */ 341 342 /** ditto */ 343 real fDistributionComplInv(int df1, int df2, real p ) 344 in { 345 assert(df1>=1 && df2>=1); 346 assert(p>=0 && p<=1.0); 347 } 348 body{ 349 real a = df1; 350 real b = df2; 351 /* Compute probability for x = 0.5. */ 352 real w = betaIncomplete( 0.5L*b, 0.5L*a, 0.5L ); 353 /* If that is greater than p, then the solution w < .5. 354 Otherwise, solve at 1-p to remove cancellation in (b - b*w). */ 355 if ( w > p || p < 0.001L) { 356 w = betaIncompleteInv( 0.5L*b, 0.5L*a, p ); 357 return (b - b*w)/(a*w); 358 } else { 359 w = betaIncompleteInv( 0.5L*a, 0.5L*b, 1.0L - p ); 360 return b*w/(a*(1.0L-w)); 361 } 362 } 363 364 debug(UnitTest) { 365 unittest { 366 // fDistCompl(df1, df2, x) = Excel's FDIST(x, df1, df2) 367 assert(fabs(fDistributionCompl(6, 4, 16.5) - 0.00858719177897249L)< 0.0000000000005L); 368 assert(fabs((1-fDistribution(12, 23, 0.1)) - 0.99990562845505L)< 0.0000000000005L); 369 assert(fabs(fDistributionComplInv(8, 34, 0.2) - 1.48267037661408L)< 0.0000000005L); 370 assert(fabs(fDistributionComplInv(4, 16, 0.008) - 5.043_537_593_48596L)< 0.0000000005L); 371 // Regression test: This one used to fail because of a bug in the definition of MINLOG. 372 assert(feqrel(fDistributionCompl(4, 16, fDistributionComplInv(4,16, 0.008)), 0.008L)>=real.mant_dig-3); 373 } 374 } 375 376 /** $(POWER χ,2) cumulative distribution function and its complement. 377 * 378 * Returns the area under the left hand tail (from 0 to x) 379 * of the Chi square probability density function with 380 * v degrees of freedom. The complement returns the area under 381 * the right hand tail (from x to ∞). 382 * 383 * chiSqrDistribution(x | v) = ($(INTEGRATE 0, x) 384 * $(POWER t, v/2-1) $(POWER e, -t/2) dt ) 385 * / $(POWER 2, v/2) $(GAMMA)(v/2) 386 * 387 * chiSqrDistributionCompl(x | v) = ($(INTEGRATE x, ∞) 388 * $(POWER t, v/2-1) $(POWER e, -t/2) dt ) 389 * / $(POWER 2, v/2) $(GAMMA)(v/2) 390 * 391 * Params: 392 * v = degrees of freedom. Must be positive. 393 * x = the $(POWER χ,2) variable. Must be positive. 394 * 395 */ 396 real chiSqrDistribution(real v, real x) 397 in { 398 assert(x>=0); 399 assert(v>=1.0); 400 } 401 body{ 402 return gammaIncomplete( 0.5*v, 0.5*x); 403 } 404 405 /** ditto */ 406 real chiSqrDistributionCompl(real v, real x) 407 in { 408 assert(x>=0); 409 assert(v>=1.0); 410 } 411 body{ 412 return gammaIncompleteCompl( 0.5L*v, 0.5L*x ); 413 } 414 415 /** 416 * Inverse of complemented $(POWER χ, 2) distribution 417 * 418 * Finds the $(POWER χ, 2) argument x such that the integral 419 * from x to ∞ of the $(POWER χ, 2) density is equal 420 * to the given cumulative probability p. 421 * 422 * Params: 423 * p = Cumulative probability. 0<= p <=1. 424 * v = Degrees of freedom. Must be positive. 425 * 426 */ 427 real chiSqrDistributionComplInv(real v, real p) 428 in { 429 assert(p>=0 && p<=1.0L); 430 assert(v>=1.0L); 431 } 432 body 433 { 434 return 2.0 * gammaIncompleteComplInv( 0.5*v, p); 435 } 436 437 debug(UnitTest) { 438 unittest { 439 assert(feqrel(chiSqrDistributionCompl(3.5L, chiSqrDistributionComplInv(3.5L, 0.1L)), 0.1L)>=real.mant_dig-5); 440 assert(chiSqrDistribution(19.02L, 0.4L) + chiSqrDistributionCompl(19.02L, 0.4L) ==1.0L); 441 } 442 } 443 444 /** 445 * The Γ distribution and its complement 446 * 447 * The Γ distribution is defined as the integral from 0 to x of the 448 * gamma probability density function. The complementary function returns the 449 * integral from x to ∞ 450 * 451 * gammaDistribution = ($(INTEGRATE 0, x) $(POWER t, b-1)$(POWER e, -at) dt) $(POWER a, b)/Γ(b) 452 * 453 * x must be greater than 0. 454 */ 455 real gammaDistribution(real a, real b, real x) 456 in { 457 assert(x>=0); 458 } 459 body { 460 return gammaIncomplete(b, a*x); 461 } 462 463 /** ditto */ 464 real gammaDistributionCompl(real a, real b, real x ) 465 in { 466 assert(x>=0); 467 } 468 body { 469 return gammaIncompleteCompl( b, a * x ); 470 } 471 472 debug(UnitTest) { 473 unittest { 474 assert(gammaDistribution(7,3,0.18)+gammaDistributionCompl(7,3,0.18)==1); 475 } 476 } 477 478 /********************** 479 * Beta distribution and its inverse 480 * 481 * Returns the incomplete beta integral of the arguments, evaluated 482 * from zero to x. The function is defined as 483 * 484 * betaDistribution = Γ(a+b)/(Γ(a) Γ(b)) * 485 * $(INTEGRATE 0, x) $(POWER t, a-1)$(POWER (1-t),b-1) dt 486 * 487 * The domain of definition is 0 <= x <= 1. In this 488 * implementation a and b are restricted to positive values. 489 * The integral from x to 1 may be obtained by the symmetry 490 * relation 491 * 492 * betaDistributionCompl(a, b, x ) = betaDistribution( b, a, 1-x ) 493 * 494 * The integral is evaluated by a continued fraction expansion 495 * or, when b*x is small, by a power series. 496 * 497 * The inverse finds the value of x for which betaDistribution(a,b,x) - y = 0 498 */ 499 real betaDistribution(real a, real b, real x ) 500 { 501 return betaIncomplete(a, b, x ); 502 } 503 504 /** ditto */ 505 real betaDistributionCompl(real a, real b, real x) 506 { 507 return betaIncomplete(b, a, 1-x); 508 } 509 510 /** ditto */ 511 real betaDistributionInv(real a, real b, real y) 512 { 513 return betaIncompleteInv(a, b, y); 514 } 515 516 /** ditto */ 517 real betaDistributionComplInv(real a, real b, real y) 518 { 519 return 1-betaIncompleteInv(b, a, y); 520 } 521 522 debug(UnitTest) { 523 unittest { 524 assert(feqrel(betaDistributionInv(2, 6, betaDistribution(2,6, 0.7L)),0.7L)>=real.mant_dig-3); 525 assert(feqrel(betaDistributionComplInv(1.3, 8, betaDistributionCompl(1.3,8, 0.01L)),0.01L)>=real.mant_dig-4); 526 } 527 } 528 529 /** 530 * The Poisson distribution, its complement, and inverse 531 * 532 * k is the number of events. m is the mean. 533 * The Poisson distribution is defined as the sum of the first k terms of 534 * the Poisson density function. 535 * The complement returns the sum of the terms k+1 to ∞. 536 * 537 * poissonDistribution = $(BIGSUM j=0, k) $(POWER e, -m) $(POWER m, j)/j! 538 * 539 * poissonDistributionCompl = $(BIGSUM j=k+1, ∞) $(POWER e, -m) $(POWER m, j)/j! 540 * 541 * The terms are not summed directly; instead the incomplete 542 * gamma integral is employed, according to the relation 543 * 544 * y = poissonDistribution( k, m ) = gammaIncompleteCompl( k+1, m ). 545 * 546 * The arguments must both be positive. 547 */ 548 real poissonDistribution(int k, real m ) 549 in { 550 assert(k>=0); 551 assert(m>0); 552 } 553 body { 554 return gammaIncompleteCompl( k+1.0, m ); 555 } 556 557 /** ditto */ 558 real poissonDistributionCompl(int k, real m ) 559 in { 560 assert(k>=0); 561 assert(m>0); 562 } 563 body { 564 return gammaIncomplete( k+1.0, m ); 565 } 566 567 /** ditto */ 568 real poissonDistributionInv( int k, real p ) 569 in { 570 assert(k>=0); 571 assert(p>=0.0 && p<=1.0); 572 } 573 body { 574 return gammaIncompleteComplInv(k+1, p); 575 } 576 577 debug(UnitTest) { 578 unittest { 579 // = Excel's POISSON(k, m, TRUE) 580 assert( fabs(poissonDistribution(5, 6.3) 581 - 0.398771730072867L) < 0.000000000000005L); 582 assert( feqrel(poissonDistributionInv(8, poissonDistribution(8, 2.7e3L)), 2.7e3L)>=real.mant_dig-2); 583 assert( poissonDistribution(2, 8.4e-5) + poissonDistributionCompl(2, 8.4e-5) == 1.0L); 584 } 585 } 586 587 /*********************************** 588 * Binomial distribution and complemented binomial distribution 589 * 590 * The binomial distribution is defined as the sum of the terms 0 through k 591 * of the Binomial probability density. 592 * The complement returns the sum of the terms k+1 through n. 593 * 594 binomialDistribution = $(BIGSUM j=0, k) $(CHOOSE n, j) $(POWER p, j) $(POWER (1-p), n-j) 595 596 binomialDistributionCompl = $(BIGSUM j=k+1, n) $(CHOOSE n, j) $(POWER p, j) $(POWER (1-p), n-j) 597 * 598 * The terms are not summed directly; instead the incomplete 599 * beta integral is employed, according to the formula 600 * 601 * y = binomialDistribution( k, n, p ) = betaDistribution( n-k, k+1, 1-p ). 602 * 603 * The arguments must be positive, with p ranging from 0 to 1, and k<=n. 604 */ 605 real binomialDistribution(int k, int n, real p ) 606 in { 607 assert(p>=0 && p<=1.0); // domain error 608 assert(k>=0 && k<=n); 609 } 610 body{ 611 real dk, dn, q; 612 if( k == n ) 613 return 1.0L; 614 615 q = 1.0L - p; 616 dn = n - k; 617 if ( k == 0 ) { 618 return pow( q, dn ); 619 } else { 620 return betaIncomplete( dn, k + 1, q ); 621 } 622 } 623 624 debug(UnitTest) { 625 unittest { 626 // = Excel's BINOMDIST(k, n, p, TRUE) 627 assert( fabs(binomialDistribution(8, 12, 0.5) 628 - 0.927001953125L) < 0.0000000000005L); 629 assert( fabs(binomialDistribution(0, 3, 0.008L) 630 - 0.976191488L) < 0.00000000005L); 631 assert(binomialDistribution(7,7, 0.3)==1.0); 632 } 633 } 634 635 /** ditto */ 636 real binomialDistributionCompl(int k, int n, real p ) 637 in { 638 assert(p>=0 && p<=1.0); // domain error 639 assert(k>=0 && k<=n); 640 } 641 body{ 642 if ( k == n ) { 643 return 0; 644 } 645 real dn = n - k; 646 if ( k == 0 ) { 647 if ( p < .01L ) 648 return -expm1( dn * log1p(-p) ); 649 else 650 return 1.0L - pow( 1.0L-p, dn ); 651 } else { 652 return betaIncomplete( k+1, dn, p ); 653 } 654 } 655 656 debug(UnitTest){ 657 unittest { 658 // = Excel's (1 - BINOMDIST(k, n, p, TRUE)) 659 assert( fabs(1.0L-binomialDistributionCompl(0, 15, 0.003) 660 - 0.955932824838906L) < 0.0000000000000005L); 661 assert( fabs(1.0L-binomialDistributionCompl(0, 25, 0.2) 662 - 0.00377789318629572L) < 0.000000000000000005L); 663 assert( fabs(1.0L-binomialDistributionCompl(8, 12, 0.5) 664 - 0.927001953125L) < 0.00000000000005L); 665 assert(binomialDistributionCompl(7,7, 0.3)==0.0); 666 } 667 } 668 669 /** Inverse binomial distribution 670 * 671 * Finds the event probability p such that the sum of the 672 * terms 0 through k of the Binomial probability density 673 * is equal to the given cumulative probability y. 674 * 675 * This is accomplished using the inverse beta integral 676 * function and the relation 677 * 678 * 1 - p = betaDistributionInv( n-k, k+1, y ). 679 * 680 * The arguments must be positive, with 0 <= y <= 1, and k <= n. 681 */ 682 real binomialDistributionInv( int k, int n, real y ) 683 in { 684 assert(y>=0 && y<=1.0); // domain error 685 assert(k>=0 && k<=n); 686 } 687 body{ 688 real dk, p; 689 real dn = n - k; 690 if ( k == 0 ) { 691 if( y > 0.8L ) 692 p = -expm1( log1p(y-1.0L) / dn ); 693 else 694 p = 1.0L - pow( y, 1.0L/dn ); 695 } else { 696 dk = k + 1; 697 p = betaIncomplete( dn, dk, y ); 698 if( p > 0.5 ) 699 p = betaIncompleteInv( dk, dn, 1.0L-y ); 700 else 701 p = 1.0 - betaIncompleteInv( dn, dk, y ); 702 } 703 return p; 704 } 705 706 debug(UnitTest){ 707 unittest { 708 real w = binomialDistribution(9, 15, 0.318L); 709 assert(feqrel(binomialDistributionInv(9, 15, w), 0.318L)>=real.mant_dig-3); 710 w = binomialDistribution(5, 35, 0.718L); 711 assert(feqrel(binomialDistributionInv(5, 35, w), 0.718L)>=real.mant_dig-3); 712 w = binomialDistribution(0, 24, 0.637L); 713 assert(feqrel(binomialDistributionInv(0, 24, w), 0.637L)>=real.mant_dig-3); 714 w = binomialDistributionInv(0, 59, 0.962L); 715 assert(feqrel(binomialDistribution(0, 59, w), 0.962L)>=real.mant_dig-5); 716 } 717 } 718 719 /** Negative binomial distribution and its inverse 720 * 721 * Returns the sum of the terms 0 through k of the negative 722 * binomial distribution: 723 * 724 * $(BIGSUM j=0, k) $(CHOOSE n+j-1, j-1) $(POWER p, n) $(POWER (1-p), j) 725 * 726 * In a sequence of Bernoulli trials, this is the probability 727 * that k or fewer failures precede the n-th success. 728 * 729 * The arguments must be positive, with 0 < p < 1 and r>0. 730 * 731 * The inverse finds the argument y such 732 * that negativeBinomialDistribution(k,n,y) is equal to p. 733 * 734 * The Geometric Distribution is a special case of the negative binomial 735 * distribution. 736 * ----------------------- 737 * geometricDistribution(k, p) = negativeBinomialDistribution(k, 1, p); 738 * ----------------------- 739 * References: 740 * $(LINK http://mathworld.wolfram.com/NegativeBinomialDistribution.html) 741 */ 742 743 real negativeBinomialDistribution(int k, int n, real p ) 744 in { 745 assert(p>=0 && p<=1.0); // domain error 746 assert(k>=0); 747 } 748 body{ 749 if ( k == 0 ) return pow( p, n ); 750 return betaIncomplete( n, k + 1, p ); 751 } 752 753 /** ditto */ 754 real negativeBinomialDistributionInv(int k, int n, real p ) 755 in { 756 assert(p>=0 && p<=1.0); // domain error 757 assert(k>=0); 758 } 759 body{ 760 return betaIncompleteInv(n, k + 1, p); 761 } 762 763 debug(UnitTest) { 764 unittest { 765 // Value obtained by sum of terms of MS Excel 2003's NEGBINOMDIST. 766 assert( fabs(negativeBinomialDistribution(10, 20, 0.2) - 3.830_52E-08)< 0.000_005e-08); 767 assert(feqrel(negativeBinomialDistributionInv(14, 208, negativeBinomialDistribution(14, 208, 1e-4L)), 1e-4L)>=real.mant_dig-3); 768 } 769 }