Changeset View
Standalone View
src/backend/hypothesisTest/HypothesisTest.cpp
Context not available. | |||||
47 | #include <gsl/gsl_cdf.h> | 47 | #include <gsl/gsl_cdf.h> | ||
---|---|---|---|---|---|
48 | #include <gsl/gsl_math.h> | 48 | #include <gsl/gsl_math.h> | ||
49 | 49 | | |||
50 | #include <math.h> | | |||
51 | | ||||
52 | extern "C" { | 50 | extern "C" { | ||
53 | #include "backend/nsl/nsl_stats.h" | 51 | #include "backend/nsl/nsl_stats.h" | ||
54 | } | 52 | } | ||
Context not available. | |||||
106 | 104 | | |||
107 | void HypothesisTest::performTest(Test test, bool categoricalVariable, bool equalVariance) { | 105 | void HypothesisTest::performTest(Test test, bool categoricalVariable, bool equalVariance) { | ||
108 | d->tailType = test.tail; | 106 | d->tailType = test.tail; | ||
107 | d->pValue.clear(); | ||||
108 | d->statisticValue.clear(); | ||||
109 | d->statsTable = ""; | ||||
110 | for (int i = 0; i < 10; i++) | ||||
111 | d->resultLine[i]->clear(); | ||||
112 | | ||||
109 | switch (test.subtype) { | 113 | switch (test.subtype) { | ||
110 | case HypothesisTest::Test::SubType::TwoSampleIndependent: { | 114 | case HypothesisTest::Test::SubType::TwoSampleIndependent: { | ||
111 | d->currTestName = "<h2>" + i18n("Two Sample Independent Test") + "</h2>"; | 115 | d->currTestName = "<h2>" + i18n("Two Sample Independent Test") + "</h2>"; | ||
Context not available. | |||||
141 | void HypothesisTest::performLeveneTest(bool categoricalVariable) { | 145 | void HypothesisTest::performLeveneTest(bool categoricalVariable) { | ||
142 | d->currTestName = "<h2>" + i18n("Levene Test for Equality of Variance") + "</h2>"; | 146 | d->currTestName = "<h2>" + i18n("Levene Test for Equality of Variance") + "</h2>"; | ||
143 | d->performLeveneTest(categoricalVariable); | 147 | d->performLeveneTest(categoricalVariable); | ||
144 | | ||||
145 | emit changed(); | 148 | emit changed(); | ||
146 | } | 149 | } | ||
147 | 150 | | |||
148 | double HypothesisTest::statisticValue() { | 151 | QList<double> HypothesisTest::statisticValue() { | ||
149 | return d->statisticValue; | 152 | return d->statisticValue; | ||
150 | } | 153 | } | ||
151 | 154 | | |||
152 | double HypothesisTest::pValue() { | 155 | QList<double> HypothesisTest::pValue() { | ||
153 | return d->pValue; | 156 | return d->pValue; | ||
154 | } | 157 | } | ||
155 | 158 | | |||
Context not available. | |||||
203 | /**************************Two Sample Independent *************************************/ | 206 | /**************************Two Sample Independent *************************************/ | ||
204 | 207 | | |||
205 | void HypothesisTestPrivate::performTwoSampleIndependentTest(HypothesisTest::Test::Type test, bool categoricalVariable, bool equalVariance) { | 208 | void HypothesisTestPrivate::performTwoSampleIndependentTest(HypothesisTest::Test::Type test, bool categoricalVariable, bool equalVariance) { | ||
206 | clearTestView(); | | |||
207 | | ||||
208 | if (columns.size() != 2) { | 209 | if (columns.size() != 2) { | ||
209 | printError("Inappropriate number of columns selected"); | 210 | printError("Inappropriate number of columns selected"); | ||
210 | return; | 211 | return; | ||
Context not available. | |||||
300 | 301 | | |||
301 | sp = qSqrt(((n[0]-1) * gsl_pow_2(std[0]) + | 302 | sp = qSqrt(((n[0]-1) * gsl_pow_2(std[0]) + | ||
302 | (n[1]-1) * gsl_pow_2(std[1]) ) / df ); | 303 | (n[1]-1) * gsl_pow_2(std[1]) ) / df ); | ||
303 | statisticValue = (mean[0] - mean[1]) / (sp * qSqrt(1.0/n[0] + 1.0/n[1])); | 304 | statisticValue.append((mean[0] - mean[1]) / (sp * qSqrt(1.0/n[0] + 1.0/n[1]))); | ||
sgerlach: I would suggest to use sqrt() from cmath instead of qSqrt(). What do you think? | |||||
devanshuagarwal: Yes you are right. | |||||
304 | printLine(9, "<b>Assumption:</b> Equal Variance b/w both population means"); | 305 | printLine(9, "<b>Assumption:</b> Equal Variance b/w both population means"); | ||
305 | } else { | 306 | } else { | ||
306 | double temp_val; | 307 | double temp_val; | ||
Context not available. | |||||
309 | (gsl_pow_2( (gsl_pow_2(std[1]) / n[1]) ) / (n[1]-1))); | 310 | (gsl_pow_2( (gsl_pow_2(std[1]) / n[1]) ) / (n[1]-1))); | ||
310 | df = qRound(temp_val); | 311 | df = qRound(temp_val); | ||
311 | 312 | | |||
312 | statisticValue = (mean[0] - mean[1]) / (qSqrt( (gsl_pow_2(std[0])/n[0]) + | 313 | statisticValue.append((mean[0] - mean[1]) / (qSqrt( (gsl_pow_2(std[0])/n[0]) + | ||
313 | (gsl_pow_2(std[1])/n[1]))); | 314 | (gsl_pow_2(std[1])/n[1])))); | ||
314 | printLine(9, "<b>Assumption:</b> UnEqual Variance b/w both population means"); | 315 | printLine(9, "<b>Assumption:</b> UnEqual Variance b/w both population means"); | ||
315 | } | 316 | } | ||
316 | 317 | | |||
Context not available. | |||||
320 | case HypothesisTest::Test::Type::ZTest: { | 321 | case HypothesisTest::Test::Type::ZTest: { | ||
321 | testName = "Z"; | 322 | testName = "Z"; | ||
322 | sp = qSqrt( ((n[0]-1) * gsl_pow_2(std[0]) + (n[1]-1) * gsl_pow_2(std[1])) / df); | 323 | sp = qSqrt( ((n[0]-1) * gsl_pow_2(std[0]) + (n[1]-1) * gsl_pow_2(std[1])) / df); | ||
323 | statisticValue = (mean[0] - mean[1]) / (sp * qSqrt( 1.0 / n[0] + 1.0 / n[1])); | 324 | statisticValue.append((mean[0] - mean[1]) / (sp * qSqrt( 1.0 / n[0] + 1.0 / n[1]))); | ||
324 | pValue = gsl_cdf_gaussian_P(statisticValue, sp); | 325 | // pValue.append(gsl_cdf_gaussian_P(statisticValue, sp)); | ||
325 | break; | 326 | break; | ||
326 | } | 327 | } | ||
327 | case HypothesisTest::Test::Type::Anova: | 328 | case HypothesisTest::Test::Type::Anova: | ||
Context not available. | |||||
330 | } | 331 | } | ||
331 | 332 | | |||
332 | currTestName = "<h2>" + i18n("Two Sample Independent %1 Test for %2 vs %3", testName, col1Name, col2Name) + "</h2>"; | 333 | currTestName = "<h2>" + i18n("Two Sample Independent %1 Test for %2 vs %3", testName, col1Name, col2Name) + "</h2>"; | ||
333 | pValue = getPValue(test, statisticValue, col1Name, col2Name, (mean[0] - mean[1]), sp, df); | 334 | pValue.append(getPValue(test, statisticValue[0], col1Name, col2Name, (mean[0] - mean[1]), sp, df)); | ||
334 | 335 | | |||
335 | printLine(2, i18n("Significance level is %1", round(significanceLevel)), "blue"); | 336 | printLine(2, i18n("Significance level is %1", round(significanceLevel)), "blue"); | ||
336 | 337 | | |||
337 | printLine(4, i18n("%1 Value is %2 ", testName, round(statisticValue)), "green"); | 338 | printLine(4, i18n("%1 Value is %2 ", testName, round(statisticValue[0])), "green"); | ||
338 | printTooltip(4, i18n("More is the |%1-value|, more safely we can reject the null hypothesis", testName)); | 339 | printTooltip(4, i18n("More is the |%1-value|, more safely we can reject the null hypothesis", testName)); | ||
339 | 340 | | |||
340 | printLine(5, i18n("P Value is %1 ", pValue), "green"); | 341 | printLine(5, i18n("P Value is %1 ", pValue[0]), "green"); | ||
341 | 342 | | |||
342 | printLine(6, i18n("Degree of Freedom is %1", df), "green"); | 343 | printLine(6, i18n("Degree of Freedom is %1", df), "green"); | ||
343 | printTooltip(6, i18n("Number of independent Pieces of information that went into calculating the estimate")); | 344 | printTooltip(6, i18n("Number of independent Pieces of information that went into calculating the estimate")); | ||
344 | 345 | | |||
345 | if (pValue <= significanceLevel) | 346 | if (pValue[0] <= significanceLevel) | ||
346 | printTooltip(5, i18n("We can safely reject Null Hypothesis for significance level %1", round(significanceLevel))); | 347 | printTooltip(5, i18n("We can safely reject Null Hypothesis for significance level %1", round(significanceLevel))); | ||
347 | else | 348 | else | ||
348 | printTooltip(5, i18n("There is a plausibility for Null Hypothesis to be true")); | 349 | printTooltip(5, i18n("There is a plausibility for Null Hypothesis to be true")); | ||
Context not available. | |||||
352 | /********************************Two Sample Paired ***************************************/ | 353 | /********************************Two Sample Paired ***************************************/ | ||
353 | 354 | | |||
354 | void HypothesisTestPrivate::performTwoSamplePairedTest(HypothesisTest::Test::Type test) { | 355 | void HypothesisTestPrivate::performTwoSamplePairedTest(HypothesisTest::Test::Type test) { | ||
355 | clearTestView(); | | |||
356 | | ||||
357 | if (columns.size() != 2) { | 356 | if (columns.size() != 2) { | ||
358 | printError("Inappropriate number of columns selected"); | 357 | printError("Inappropriate number of columns selected"); | ||
359 | 358 | | |||
Context not available. | |||||
403 | 402 | | |||
404 | switch (test) { | 403 | switch (test) { | ||
405 | case HypothesisTest::Test::Type::TTest: { | 404 | case HypothesisTest::Test::Type::TTest: { | ||
406 | statisticValue = mean / (std / qSqrt(n)); | 405 | statisticValue[0] = mean / (std / qSqrt(n)); | ||
407 | df = n - 1; | 406 | df = n - 1; | ||
408 | testName = "T"; | 407 | testName = "T"; | ||
409 | printLine(6, i18n("Degree of Freedom is %1</p", df), "green"); | 408 | printLine(6, i18n("Degree of Freedom is %1</p", df), "green"); | ||
Context not available. | |||||
411 | } | 410 | } | ||
412 | case HypothesisTest::Test::Type::ZTest: { | 411 | case HypothesisTest::Test::Type::ZTest: { | ||
413 | testName = "Z"; | 412 | testName = "Z"; | ||
414 | statisticValue = mean / (std / qSqrt(n)); | 413 | statisticValue[0] = mean / (std / qSqrt(n)); | ||
415 | df = n - 1; | 414 | df = n - 1; | ||
416 | break; | 415 | break; | ||
417 | } | 416 | } | ||
Context not available. | |||||
422 | 421 | | |||
423 | } | 422 | } | ||
424 | 423 | | |||
425 | pValue = getPValue(test, statisticValue, columns[0]->name(), i18n("%1", populationMean), mean, std, df); | 424 | pValue.append(getPValue(test, statisticValue[0], columns[0]->name(), i18n("%1", populationMean), mean, std, df)); | ||
426 | currTestName = "<h2>" + i18n("One Sample %1 Test for %2 vs %3", testName, columns[0]->name(), columns[1]->name()) + "</h2>"; | 425 | currTestName = "<h2>" + i18n("One Sample %1 Test for %2 vs %3", testName, columns[0]->name(), columns[1]->name()) + "</h2>"; | ||
427 | 426 | | |||
428 | printLine(2, i18n("Significance level is %1 ", round(significanceLevel)), "blue"); | 427 | printLine(2, i18n("Significance level is %1 ", round(significanceLevel)), "blue"); | ||
429 | printLine(4, i18n("%1 Value is %2 ", testName, round(statisticValue)), "green"); | 428 | printLine(4, i18n("%1 Value is %2 ", testName, round(statisticValue[0])), "green"); | ||
430 | printLine(5, i18n("P Value is %1 ", pValue), "green"); | 429 | printLine(5, i18n("P Value is %1 ", pValue[0]), "green"); | ||
431 | 430 | | |||
432 | if (pValue <= significanceLevel) | 431 | if (pValue[0] <= significanceLevel) | ||
433 | printTooltip(5, i18n("We can safely reject Null Hypothesis for significance level %1", significanceLevel)); | 432 | printTooltip(5, i18n("We can safely reject Null Hypothesis for significance level %1", significanceLevel)); | ||
434 | else | 433 | else | ||
435 | printTooltip(5, i18n("There is a plausibility for Null Hypothesis to be true")); | 434 | printTooltip(5, i18n("There is a plausibility for Null Hypothesis to be true")); | ||
Context not available. | |||||
440 | /******************************** One Sample ***************************************/ | 439 | /******************************** One Sample ***************************************/ | ||
441 | 440 | | |||
442 | void HypothesisTestPrivate::performOneSampleTest(HypothesisTest::Test::Type test) { | 441 | void HypothesisTestPrivate::performOneSampleTest(HypothesisTest::Test::Type test) { | ||
443 | clearTestView(); | | |||
444 | | ||||
445 | if (columns.size() != 1) { | 442 | if (columns.size() != 1) { | ||
446 | printError("Inappropriate number of columns selected"); | 443 | printError("Inappropriate number of columns selected"); | ||
447 | 444 | | |||
Context not available. | |||||
488 | switch (test) { | 485 | switch (test) { | ||
489 | case HypothesisTest::Test::Type::TTest: { | 486 | case HypothesisTest::Test::Type::TTest: { | ||
490 | testName = "T"; | 487 | testName = "T"; | ||
491 | statisticValue = (mean - populationMean) / (std / qSqrt(n)); | 488 | statisticValue.append((mean - populationMean) / (std / qSqrt(n))); | ||
492 | df = n - 1; | 489 | df = n - 1; | ||
493 | printLine(6, i18n("Degree of Freedom is %1", df), "blue"); | 490 | printLine(6, i18n("Degree of Freedom is %1", df), "blue"); | ||
494 | break; | 491 | break; | ||
Context not available. | |||||
496 | case HypothesisTest::Test::Type::ZTest: { | 493 | case HypothesisTest::Test::Type::ZTest: { | ||
497 | testName = "Z"; | 494 | testName = "Z"; | ||
498 | df = 0; | 495 | df = 0; | ||
499 | statisticValue = (mean - populationMean) / (std / qSqrt(n)); | 496 | statisticValue.append((mean - populationMean) / (std / qSqrt(n))); | ||
500 | break; | 497 | break; | ||
501 | } | 498 | } | ||
502 | case HypothesisTest::Test::Type::Anova: | 499 | case HypothesisTest::Test::Type::Anova: | ||
Context not available. | |||||
504 | break; | 501 | break; | ||
505 | } | 502 | } | ||
506 | 503 | | |||
507 | pValue = getPValue(test, statisticValue, columns[0]->name(), i18n("%1",populationMean), mean - populationMean, std, df); | 504 | pValue.append(getPValue(test, statisticValue[0], columns[0]->name(), i18n("%1",populationMean), mean - populationMean, std, df)); | ||
508 | currTestName = "<h2>" + i18n("One Sample %1 Test for %2", testName, columns[0]->name()) + "</h2>"; | 505 | currTestName = "<h2>" + i18n("One Sample %1 Test for %2", testName, columns[0]->name()) + "</h2>"; | ||
509 | 506 | | |||
510 | printLine(2, i18n("Significance level is %1", round(significanceLevel)), "blue"); | 507 | printLine(2, i18n("Significance level is %1", round(significanceLevel)), "blue"); | ||
511 | printLine(4, i18n("%1 Value is %2", testName, round(statisticValue)), "green"); | 508 | printLine(4, i18n("%1 Value is %2", testName, round(statisticValue[0])), "green"); | ||
512 | printLine(5, i18n("P Value is %1", pValue), "green"); | 509 | printLine(5, i18n("P Value is %1", pValue[0]), "green"); | ||
513 | 510 | | |||
514 | if (pValue <= significanceLevel) | 511 | if (pValue[0] <= significanceLevel) | ||
515 | printTooltip(5, i18n("We can safely reject Null Hypothesis for significance level %1", significanceLevel)); | 512 | printTooltip(5, i18n("We can safely reject Null Hypothesis for significance level %1", significanceLevel)); | ||
516 | else | 513 | else | ||
517 | printTooltip(5, i18n("There is a plausibility for Null Hypothesis to be true")); | 514 | printTooltip(5, i18n("There is a plausibility for Null Hypothesis to be true")); | ||
Context not available. | |||||
526 | // w stands for within groups | 523 | // w stands for within groups | ||
527 | // np is number of partition i.e., number of classes | 524 | // np is number of partition i.e., number of classes | ||
528 | void HypothesisTestPrivate::performOneWayAnova() { | 525 | void HypothesisTestPrivate::performOneWayAnova() { | ||
529 | clearTestView(); | 526 | int np, totalRows; | ||
530 | int np, totalRows; | | |||
531 | countPartitions(columns[0], np, totalRows); | 527 | countPartitions(columns[0], np, totalRows); | ||
532 | 528 | | |||
533 | int* ni = new int[np]; | 529 | int* ni = new int[np]; | ||
Context not available. | |||||
551 | double sW = 0; // sum of squares of (value - mean of group) within the groups | 547 | double sW = 0; // sum of squares of (value - mean of group) within the groups | ||
552 | int fW = 0; // degree of freedom within the group | 548 | int fW = 0; // degree of freedom within the group | ||
553 | double msW = 0; // mean sum of squares within the groups | 549 | double msW = 0; // mean sum of squares within the groups | ||
554 | double fValue = 0; | | |||
555 | | ||||
556 | 550 | | |||
557 | // now finding mean of each group; | 551 | // now finding mean of each group; | ||
558 | 552 | | |||
Context not available. | |||||
573 | msB = sB / fB; | 567 | msB = sB / fB; | ||
574 | 568 | | |||
575 | msW = sW / fW; | 569 | msW = sW / fW; | ||
576 | fValue = msB / msW; | 570 | statisticValue.append(msB / msW); | ||
577 | 571 | | |||
578 | 572 | | |||
579 | pValue = nsl_stats_fdist_p(fValue, static_cast<size_t>(np-1), fW); | 573 | pValue.append(nsl_stats_fdist_p(statisticValue[0], static_cast<size_t>(np-1), fW)); | ||
580 | 574 | | |||
581 | QMapIterator<QString, int> i(classnameToIndex); | 575 | QMapIterator<QString, int> i(classnameToIndex); | ||
582 | while (i.hasNext()) { | 576 | while (i.hasNext()) { | ||
Context not available. | |||||
643 | delete[] std; | 637 | delete[] std; | ||
644 | delete[] colNames; | 638 | delete[] colNames; | ||
645 | 639 | | |||
646 | printLine(1, i18n("F Value is %1", round(fValue)), "green"); | 640 | printLine(1, i18n("F Value is %1", round(statisticValue[0])), "green"); | ||
647 | printLine(2, i18n("P Value is %1 ", pValue), "green"); | 641 | printLine(2, i18n("P Value is %1 ", pValue[0]), "green"); | ||
648 | 642 | | |||
649 | if (pValue <= significanceLevel) | 643 | if (pValue[0] <= significanceLevel) | ||
650 | printTooltip(2, i18n("We can safely reject Null Hypothesis for significance level %1", significanceLevel)); | 644 | printTooltip(2, i18n("We can safely reject Null Hypothesis for significance level %1", significanceLevel)); | ||
651 | else | 645 | else | ||
652 | printTooltip(2, i18n("There is a plausibility for Null Hypothesis to be true")); | 646 | printTooltip(2, i18n("There is a plausibility for Null Hypothesis to be true")); | ||
Context not available. | |||||
659 | // all formulas and symbols are taken from: http://statweb.stanford.edu/~susan/courses/s141/exanova.pdf | 653 | // all formulas and symbols are taken from: http://statweb.stanford.edu/~susan/courses/s141/exanova.pdf | ||
660 | 654 | | |||
661 | //TODO: suppress warning of variable length array are a C99 feature. | 655 | //TODO: suppress warning of variable length array are a C99 feature. | ||
662 | //TODO: changed int mean to double mean; | 656 | //TODO: add assumptions verification option | ||
657 | //TODO: add tail option (if needed) | ||||
663 | void HypothesisTestPrivate::performTwoWayAnova() { | 658 | void HypothesisTestPrivate::performTwoWayAnova() { | ||
664 | clearTestView(); | | |||
665 | int np_a, totalRows_a; | 659 | int np_a, totalRows_a; | ||
666 | int np_b, totalRows_b; | 660 | int np_b, totalRows_b; | ||
667 | countPartitions(columns[0], np_a, totalRows_a); | 661 | countPartitions(columns[0], np_a, totalRows_a); | ||
Context not available. | |||||
720 | groupMean[i][j] /= replicates[i][j]; | 714 | groupMean[i][j] /= replicates[i][j]; | ||
721 | } | 715 | } | ||
722 | 716 | | |||
723 | for (int i = 0; i < np_a; i++) | 717 | // for (int i = 0; i < np_a; i++) | ||
724 | for (int j = 0; j < np_b; j++) | 718 | // for (int j = 0; j < np_b; j++) | ||
725 | groupMean[i][j] = int(groupMean[i][j]); | 719 | // groupMean[i][j] = int(groupMean[i][j]); | ||
726 | 720 | | |||
727 | double ss_within = 0; | 721 | double ss_within = 0; | ||
728 | for (int i = 0; i < totalRows_a; i++) { | 722 | for (int i = 0; i < totalRows_a; i++) { | ||
Context not available. | |||||
749 | for (int i = 0; i < np_a; i++) | 743 | for (int i = 0; i < np_a; i++) | ||
750 | mean += mean_a[i] / np_a; | 744 | mean += mean_a[i] / np_a; | ||
751 | 745 | | |||
752 | QDEBUG("ss_within is " << ss_within); | | |||
753 | QDEBUG("df_within is " << df_within); | | |||
754 | QDEBUG("ms_within is " << ms_within); | | |||
755 | 746 | | |||
747 | double ss_a = 0; | ||||
you are doing a lot of basic statistical calculations. Can you check if any GSL function can be used (see https://www.gnu.org/software/gsl/doc/html/statistics.html)? sgerlach: you are doing a lot of basic statistical calculations. Can you check if any GSL function can be… | |||||
The reason that I am doing so many basic calculations is to increase performance. If I will use functions then I have to call separate functions for mean, std deviation and other statistical calculations. Though asymptotically it will also end up in O(n) but for practical purposes, it will turn out to be slower as here I am transversing through the whole data only twice and there it will be f times (where f >= number of statistical calculations) devanshuagarwal: The reason that I am doing so many basic calculations is to increase performance. If I will use… | |||||
I'm not sure if i understand your argument. You don't have to calculate it f times when you save the values. Also the performance should not be a problem. I would really recommend to use GSL function to improve readability and maintainability. Of course only if it fits to the workflow. Just think about it. sgerlach: I'm not sure if i understand your argument. You don't have to calculate it f times when you… | |||||
Yes, you are right. it will improve readability and maintainability. Will use gsl functions. devanshuagarwal: Yes, you are right. it will improve readability and maintainability. Will use gsl functions. | |||||
756 | for (int i = 0; i < np_a; i++) | 748 | for (int i = 0; i < np_a; i++) | ||
757 | QDEBUG("mean_a is " << mean_a[i]); | 749 | ss_a += gsl_pow_2(mean_a[i] - mean); | ||
750 | ss_a *= replicate * np_b; | ||||
751 | | ||||
752 | int df_a = np_a - 1; | ||||
753 | double ms_a = ss_a / df_a; | ||||
754 | | ||||
755 | double ss_b = 0; | ||||
758 | for (int i = 0; i < np_b; i++) | 756 | for (int i = 0; i < np_b; i++) | ||
759 | QDEBUG("mean_b is " << mean_b[i]); | 757 | ss_b += gsl_pow_2(mean_b[i] - mean); | ||
758 | ss_b *= replicate * np_a; | ||||
759 | | ||||
760 | int df_b = np_b - 1; | ||||
761 | double ms_b = ss_b / df_b; | ||||
762 | | ||||
763 | double ss_interaction = 0; | ||||
764 | | ||||
765 | for (int i = 0; i < np_a; i++) | ||||
766 | for (int j = 0; j < np_b; j++) | ||||
767 | ss_interaction += gsl_pow_2(groupMean[i][j] - mean_a[i] - mean_b[j] + mean); | ||||
760 | 768 | | |||
769 | ss_interaction *= replicate; | ||||
770 | int df_interaction = (np_a - 1) * (np_b - 1); | ||||
771 | double ms_interaction = ss_interaction / df_interaction; | ||||
761 | 772 | | |||
762 | QString partitionNames_a[np_a]; | 773 | QString partitionNames_a[np_a]; | ||
763 | QString partitionNames_b[np_b]; | 774 | QString partitionNames_b[np_b]; | ||
Context not available. | |||||
791 | for (int i = 0; i < np_a; i++) { | 802 | for (int i = 0; i < np_a; i++) { | ||
792 | rowMajor.append(new Cell(partitionNames_a[i], level, true)); | 803 | rowMajor.append(new Cell(partitionNames_a[i], level, true)); | ||
793 | for (int j = 0; j < np_b; j++) { | 804 | for (int j = 0; j < np_b; j++) { | ||
794 | rowMajor.append(new Cell(groupMean[i][j], level)); | 805 | rowMajor.append(new Cell(round(groupMean[i][j]), level)); | ||
795 | rowMajor.append(new Cell(replicates[i][j], level)); | 806 | rowMajor.append(new Cell(replicates[i][j], level)); | ||
796 | } | 807 | } | ||
797 | rowMajor.append(new Cell(mean_a[i], level)); | 808 | rowMajor.append(new Cell(round(mean_a[i]), level)); | ||
798 | level++; | 809 | level++; | ||
799 | } | 810 | } | ||
800 | 811 | | |||
801 | rowMajor.append(new Cell("Mean", level, true)); | 812 | rowMajor.append(new Cell("Mean", level, true)); | ||
802 | for (int i = 0; i < np_b; i++) | 813 | for (int i = 0; i < np_b; i++) | ||
803 | rowMajor.append(new Cell(mean_b[i], level, false, 1, 2)); | 814 | rowMajor.append(new Cell(round(mean_b[i]), level, false, 1, 2)); | ||
804 | rowMajor.append(new Cell(mean, level)); | 815 | rowMajor.append(new Cell(round(mean), level)); | ||
805 | 816 | | |||
806 | statsTable = "<h3>" + i18n("Contingency Table") + "</h3>"; | 817 | statsTable = "<h3>" + i18n("Contingency Table") + "</h3>"; | ||
807 | statsTable += getHtmlTable3(rowMajor); | 818 | statsTable += getHtmlTable3(rowMajor); | ||
808 | 819 | | |||
809 | // QDEBUG(""); | 820 | statsTable += "</br>"; | ||
810 | // QDEBUG(""); | 821 | statsTable += "<h3>" + i18n("results table") + "</h3>"; | ||
811 | // QDEBUG(statsTable); | 822 | | ||
823 | rowMajor.clear(); | ||||
824 | level = 0; | ||||
825 | rowMajor.append(new Cell("", level, true)); | ||||
826 | rowMajor.append(new Cell("SS", level, true)); | ||||
827 | rowMajor.append(new Cell("DF", level, true)); | ||||
828 | rowMajor.append(new Cell("MS", level, true)); | ||||
829 | | ||||
830 | level++; | ||||
831 | rowMajor.append(new Cell(columns[0]->name(), level, true)); | ||||
832 | rowMajor.append(new Cell(round(ss_a), level)); | ||||
833 | rowMajor.append(new Cell(df_a, level)); | ||||
834 | rowMajor.append(new Cell(round(ms_a), level)); | ||||
835 | | ||||
836 | level++; | ||||
837 | rowMajor.append(new Cell(columns[1]->name(), level, true)); | ||||
838 | rowMajor.append(new Cell(round(ss_b), level)); | ||||
839 | rowMajor.append(new Cell(df_b, level)); | ||||
840 | rowMajor.append(new Cell(round(ms_b), level)); | ||||
841 | | ||||
842 | level++; | ||||
843 | rowMajor.append(new Cell("Interaction", level, true)); | ||||
sgerlach: translation? | |||||
devanshuagarwal: the translation is automatic in getHtmlTable3 function | |||||
sgerlach: ok | |||||
844 | rowMajor.append(new Cell(round(ss_interaction), level)); | ||||
845 | rowMajor.append(new Cell(df_interaction, level)); | ||||
846 | rowMajor.append(new Cell(round(ms_interaction), level)); | ||||
847 | | ||||
848 | level++; | ||||
849 | rowMajor.append(new Cell("Within", level, true)); | ||||
sgerlach: translation? | |||||
devanshuagarwal: It is there in getHtmlTable3 method. | |||||
sgerlach: ok | |||||
850 | rowMajor.append(new Cell(round(ss_within), level)); | ||||
851 | rowMajor.append(new Cell(df_within, level)); | ||||
852 | rowMajor.append(new Cell(round(ms_within), level)); | ||||
853 | | ||||
854 | statsTable += getHtmlTable3(rowMajor); | ||||
855 | | ||||
856 | double fValue_a = ms_a / ms_within; | ||||
857 | double fValue_b = ms_b / ms_within; | ||||
858 | double fValue_interaction = ms_interaction / ms_within; | ||||
859 | | ||||
860 | double pValue_a = nsl_stats_fdist_p(fValue_a, static_cast<size_t>(np_a - 1), df_a); | ||||
861 | double pValue_b = nsl_stats_fdist_p(fValue_b, static_cast<size_t>(np_b - 1), df_b); | ||||
862 | | ||||
863 | printLine(0, "F(df<sub>" + columns[0]->name() + "</sub>, df<sub>within</sub>) is " + round(fValue_a), "blue"); | ||||
864 | printLine(1, "F(df<sub>" + columns[1]->name() + "</sub>, df<sub>within</sub>) is " + round(fValue_b), "blue"); | ||||
865 | printLine(2, "F(df<sub>interaction</sub>, df<sub>within</sub>) is " + round(fValue_interaction), "blue"); | ||||
866 | | ||||
867 | printLine(4, "P(df<sub>" + columns[0]->name() + "</sub>, df<sub>within</sub>) is " + round(pValue_a), "blue"); | ||||
868 | printLine(5, "P(df<sub>" + columns[1]->name() + "</sub>, df<sub>within</sub>) is " + round(pValue_b), "blue"); | ||||
869 | // printLine(2, "P(df<sub>interaction</sub>, df<sub>within</sub>) is " + round(fValue_interaction), "blue"); | ||||
870 | | ||||
871 | statisticValue.append(fValue_a); | ||||
872 | statisticValue.append(fValue_b); | ||||
873 | statisticValue.append(fValue_interaction); | ||||
874 | | ||||
875 | pValue.append(pValue_a); | ||||
876 | pValue.append(pValue_b); | ||||
877 | | ||||
812 | return; | 878 | return; | ||
813 | } | 879 | } | ||
814 | 880 | | |||
Context not available. | |||||
825 | // ziBarBar = mean for all zij | 891 | // ziBarBar = mean for all zij | ||
826 | // ni = number of elements in group i | 892 | // ni = number of elements in group i | ||
827 | void HypothesisTestPrivate::performLeveneTest(bool categoricalVariable) { | 893 | void HypothesisTestPrivate::performLeveneTest(bool categoricalVariable) { | ||
828 | clearTestView(); | | |||
829 | | ||||
830 | if (columns.size() != 2) { | 894 | if (columns.size() != 2) { | ||
831 | printError("Inappropriate number of columns selected"); | 895 | printError("Inappropriate number of columns selected"); | ||
832 | return; | 896 | return; | ||
Context not available. | |||||
1045 | delete[] ziBar; | 1109 | delete[] ziBar; | ||
1046 | delete[] ni; | 1110 | delete[] ni; | ||
1047 | 1111 | | |||
1048 | pValue = nsl_stats_fdist_p(fValue, static_cast<size_t>(np-1), df); | 1112 | pValue.append(nsl_stats_fdist_p(fValue, static_cast<size_t>(np-1), df)); | ||
1049 | 1113 | | |||
1050 | printLine(0, "Null Hypothesis: Variance is equal between all classes", "blue"); | 1114 | printLine(0, "Null Hypothesis: Variance is equal between all classes", "blue"); | ||
1051 | printLine(1, "Alternate Hypothesis: Variance is not equal in at-least one pair of classes", "blue"); | 1115 | printLine(1, "Alternate Hypothesis: Variance is not equal in at-least one pair of classes", "blue"); | ||
1052 | printLine(2, i18n("Significance level is %1", round(significanceLevel)), "blue"); | 1116 | printLine(2, i18n("Significance level is %1", round(significanceLevel)), "blue"); | ||
1053 | printLine(4, i18n("F Value is %1 ", round(fValue)), "green"); | 1117 | printLine(4, i18n("F Value is %1 ", round(fValue)), "green"); | ||
1054 | printLine(5, i18n("P Value is %1 ", pValue), "green"); | 1118 | printLine(5, i18n("P Value is %1 ", pValue[0]), "green"); | ||
1055 | printLine(6, i18n("Degree of Freedom is %1", df), "green"); | 1119 | printLine(6, i18n("Degree of Freedom is %1", df), "green"); | ||
1056 | 1120 | | |||
1057 | if (pValue <= significanceLevel) { | 1121 | if (pValue[0] <= significanceLevel) { | ||
1058 | printTooltip(5, i18n("We can safely reject Null Hypothesis for significance level %1", significanceLevel)); | 1122 | printTooltip(5, i18n("We can safely reject Null Hypothesis for significance level %1", significanceLevel)); | ||
1059 | printLine(8, "Requirement for homogeneity is not met", "red"); | 1123 | printLine(8, "Requirement for homogeneity is not met", "red"); | ||
1060 | } else { | 1124 | } else { | ||
Context not available. | |||||
1062 | printLine(8, "Requirement for homogeneity is met", "green"); | 1126 | printLine(8, "Requirement for homogeneity is met", "green"); | ||
1063 | } | 1127 | } | ||
1064 | 1128 | | |||
1129 | statisticValue.append(fValue); | ||||
1065 | return; | 1130 | return; | ||
1066 | } | 1131 | } | ||
1067 | 1132 | | |||
Context not available. | |||||
1247 | 1312 | | |||
1248 | //TODO change ("⋖") symbol to ("<"), currently macro UTF8_QSTRING is not working properly if used "<" symbol; | 1313 | //TODO change ("⋖") symbol to ("<"), currently macro UTF8_QSTRING is not working properly if used "<" symbol; | ||
1249 | // TODO: check for correctness between: for TestZ with TailTwo | 1314 | // TODO: check for correctness between: for TestZ with TailTwo | ||
1250 | // pValue = 2*gsl_cdf_tdist_P(value, df) v/s | 1315 | // pValue.append(2*gsl_cdf_tdist_P(value, df) v/s | ||
1251 | // pValue = gsl_cdf_tdis_P(value, df) + gsl_cdf_tdis_P(-value, df); | 1316 | // pValue.append(gsl_cdf_tdis_P(value, df) + gsl_cdf_tdis_P(-value, df); | ||
1252 | double HypothesisTestPrivate::getPValue(const HypothesisTest::Test::Type& test, double& value, const QString& col1Name, const QString& col2Name, const double mean, const double sp, const int df) { | 1317 | double HypothesisTestPrivate::getPValue(const HypothesisTest::Test::Type& test, double& value, const QString& col1Name, const QString& col2Name, const double mean, const double sp, const int df) { | ||
1253 | 1318 | | |||
1254 | switch (test) { | 1319 | switch (test) { | ||
1255 | case HypothesisTest::Test::Type::TTest: { | 1320 | case HypothesisTest::Test::Type::TTest: { | ||
1256 | switch (tailType) { | 1321 | switch (tailType) { | ||
1257 | case HypothesisTest::Test::Tail::Negative: { | 1322 | case HypothesisTest::Test::Tail::Negative: { | ||
1258 | pValue = gsl_cdf_tdist_P(value, df); | 1323 | pValue.append(gsl_cdf_tdist_P(value, df)); | ||
1259 | printLine(0, i18n("Null Hypothesis: Population mean of %1 %2 Population mean of %3", col1Name, UTF8_QSTRING("≥"), col2Name), "blue"); | 1324 | printLine(0, i18n("Null Hypothesis: Population mean of %1 %2 Population mean of %3", col1Name, UTF8_QSTRING("≥"), col2Name), "blue"); | ||
1260 | printLine(1, i18n("Alternate Hypothesis: Population mean of %1 %2 Population mean of %3", col1Name, UTF8_QSTRING("⋖"), col2Name), "blue"); | 1325 | printLine(1, i18n("Alternate Hypothesis: Population mean of %1 %2 Population mean of %3", col1Name, UTF8_QSTRING("⋖"), col2Name), "blue"); | ||
1261 | break; | 1326 | break; | ||
1262 | } | 1327 | } | ||
1263 | case HypothesisTest::Test::Tail::Positive: { | 1328 | case HypothesisTest::Test::Tail::Positive: { | ||
1264 | value *= -1; | 1329 | value *= -1; | ||
1265 | pValue = gsl_cdf_tdist_P(value, df); | 1330 | pValue.append(gsl_cdf_tdist_P(value, df)); | ||
1266 | printLine(0, i18n("Null Hypothesis: Population mean of %1 %2 Population mean of %3", col1Name, UTF8_QSTRING("≤"), col2Name), "blue"); | 1331 | printLine(0, i18n("Null Hypothesis: Population mean of %1 %2 Population mean of %3", col1Name, UTF8_QSTRING("≤"), col2Name), "blue"); | ||
1267 | printLine(1, i18n("Alternate Hypothesis: Population mean of %1 %2 Population mean of %3", col1Name, UTF8_QSTRING(">"), col2Name), "blue"); | 1332 | printLine(1, i18n("Alternate Hypothesis: Population mean of %1 %2 Population mean of %3", col1Name, UTF8_QSTRING(">"), col2Name), "blue"); | ||
1268 | break; | 1333 | break; | ||
1269 | } | 1334 | } | ||
1270 | case HypothesisTest::Test::Tail::Two: { | 1335 | case HypothesisTest::Test::Tail::Two: { | ||
1271 | pValue = 2.*gsl_cdf_tdist_P(-fabs(value), df); | 1336 | pValue.append(2.*gsl_cdf_tdist_P(-fabs(value), df)); | ||
1272 | 1337 | | |||
1273 | printLine(0, i18n("Null Hypothesis: Population mean of %1 %2 Population mean of %3", col1Name, UTF8_QSTRING("="), col2Name), "blue"); | 1338 | printLine(0, i18n("Null Hypothesis: Population mean of %1 %2 Population mean of %3", col1Name, UTF8_QSTRING("="), col2Name), "blue"); | ||
1274 | printLine(1, i18n("Alternate Hypothesis: Population mean of %1 %2 Population mean of %3", col1Name, UTF8_QSTRING("≠"), col2Name), "blue"); | 1339 | printLine(1, i18n("Alternate Hypothesis: Population mean of %1 %2 Population mean of %3", col1Name, UTF8_QSTRING("≠"), col2Name), "blue"); | ||
Context not available. | |||||
1280 | case HypothesisTest::Test::Type::ZTest: { | 1345 | case HypothesisTest::Test::Type::ZTest: { | ||
1281 | switch (tailType) { | 1346 | switch (tailType) { | ||
1282 | case HypothesisTest::Test::Tail::Negative: { | 1347 | case HypothesisTest::Test::Tail::Negative: { | ||
1283 | pValue = gsl_cdf_gaussian_P(value - mean, sp); | 1348 | pValue.append(gsl_cdf_gaussian_P(value - mean, sp)); | ||
1284 | printLine(0, i18n("Null Hypothesis: Population mean of %1 %2 Population mean of %3 ", col1Name, UTF8_QSTRING("≥"), col2Name), "blue"); | 1349 | printLine(0, i18n("Null Hypothesis: Population mean of %1 %2 Population mean of %3 ", col1Name, UTF8_QSTRING("≥"), col2Name), "blue"); | ||
1285 | printLine(1, i18n("Alternate Hypothesis: Population mean of %1 %2 Population mean of %3 ", col1Name, UTF8_QSTRING("⋖"), col2Name), "blue"); | 1350 | printLine(1, i18n("Alternate Hypothesis: Population mean of %1 %2 Population mean of %3 ", col1Name, UTF8_QSTRING("⋖"), col2Name), "blue"); | ||
1286 | break; | 1351 | break; | ||
1287 | } | 1352 | } | ||
1288 | case HypothesisTest::Test::Tail::Positive: { | 1353 | case HypothesisTest::Test::Tail::Positive: { | ||
1289 | value *= -1; | 1354 | value *= -1; | ||
1290 | pValue = nsl_stats_tdist_p(value - mean, sp); | 1355 | pValue.append(nsl_stats_tdist_p(value - mean, sp)); | ||
1291 | printLine(0, i18n("Null Hypothesis: Population mean of %1 %2 Population mean of %3 ", col1Name, UTF8_QSTRING("≤"), col2Name), "blue"); | 1356 | printLine(0, i18n("Null Hypothesis: Population mean of %1 %2 Population mean of %3 ", col1Name, UTF8_QSTRING("≤"), col2Name), "blue"); | ||
1292 | printLine(1, i18n("Alternate Hypothesis: Population mean of %1 %2 Population mean of %3 ", col1Name, UTF8_QSTRING(">"), col2Name), "blue"); | 1357 | printLine(1, i18n("Alternate Hypothesis: Population mean of %1 %2 Population mean of %3 ", col1Name, UTF8_QSTRING(">"), col2Name), "blue"); | ||
1293 | break; | 1358 | break; | ||
1294 | } | 1359 | } | ||
1295 | case HypothesisTest::Test::Tail::Two: { | 1360 | case HypothesisTest::Test::Tail::Two: { | ||
1296 | pValue = 2.*gsl_cdf_gaussian_P(value - mean, sp); | 1361 | pValue.append(2.*gsl_cdf_gaussian_P(value - mean, sp)); | ||
1297 | printLine(0, i18n("Null Hypothesis: Population mean of %1 %2 Population mean of %3 ", col1Name, UTF8_QSTRING("="), col2Name), "blue"); | 1362 | printLine(0, i18n("Null Hypothesis: Population mean of %1 %2 Population mean of %3 ", col1Name, UTF8_QSTRING("="), col2Name), "blue"); | ||
1298 | printLine(1, i18n("Alternate Hypothesis: Population mean of %1 %2 Population mean of %3 ", col1Name, UTF8_QSTRING("≠"), col2Name), "blue"); | 1363 | printLine(1, i18n("Alternate Hypothesis: Population mean of %1 %2 Population mean of %3 ", col1Name, UTF8_QSTRING("≠"), col2Name), "blue"); | ||
1299 | break; | 1364 | break; | ||
Context not available. | |||||
1306 | break; | 1371 | break; | ||
1307 | } | 1372 | } | ||
1308 | 1373 | | |||
1309 | if (pValue > 1) | 1374 | if (pValue[0] > 1) | ||
1310 | return 1; | 1375 | return 1; | ||
sgerlach: check indentation | |||||
It is indented in the source file. I don't know why it is not shown in the differnce file. devanshuagarwal: It is indented in the source file. I don't know why it is not shown in the differnce file. | |||||
sgerlach: ok. This seems to happen sometimes. | |||||
1311 | return pValue; | 1376 | return pValue[0]; | ||
1312 | } | 1377 | } | ||
1313 | 1378 | | |||
1314 | int HypothesisTestPrivate::setSpanValues(HypothesisTestPrivate::Node* root, int& totalLevels) { | 1379 | int HypothesisTestPrivate::setSpanValues(HypothesisTestPrivate::Node* root, int& totalLevels) { | ||
Context not available. | |||||
1473 | ".tg {border-collapse:collapse;border: 1px solid black;}" | 1538 | ".tg {border-collapse:collapse;border: 1px solid black;}" | ||
1474 | ".tg td{font-family:Arial, sans-serif;font-size:14px;padding:10px 5px;border: 1px solid black;overflow:hidden;word-break:normal;color:#333;background-color:#fff;}" | 1539 | ".tg td{font-family:Arial, sans-serif;font-size:14px;padding:10px 5px;border: 1px solid black;overflow:hidden;word-break:normal;color:#333;background-color:#fff;}" | ||
1475 | ".tg th{font-family:Arial, sans-serif;font-size:14px;font-weight:normal;padding:10px 5px;border: 1px solid black;overflow:hidden;word-break:normal;color:#333;background-color:#f0f0f0;}" | 1540 | ".tg th{font-family:Arial, sans-serif;font-size:14px;font-weight:normal;padding:10px 5px;border: 1px solid black;overflow:hidden;word-break:normal;color:#333;background-color:#f0f0f0;}" | ||
1476 | "</style><table class=tg>"; | 1541 | "</style>"; | ||
1542 | | ||||
1543 | table += "<table class=tg>"; | ||||
1477 | 1544 | | |||
1478 | table += " <tr>"; | 1545 | table += " <tr>"; | ||
1479 | int prevLevel = 0; | 1546 | int prevLevel = 0; | ||
Context not available. | |||||
1501 | cellEndTag; | 1568 | cellEndTag; | ||
1502 | } | 1569 | } | ||
1503 | table += " <tr>"; | 1570 | table += " <tr>"; | ||
1571 | table += "</table>"; | ||||
1504 | return table; | 1572 | return table; | ||
1505 | } | 1573 | } | ||
1506 | 1574 | | |||
Context not available. | |||||
1527 | printLine(0, errorMsg, "red"); | 1595 | printLine(0, errorMsg, "red"); | ||
1528 | } | 1596 | } | ||
1529 | 1597 | | |||
1530 | void HypothesisTestPrivate::clearSummaryLayout() { | | |||
1531 | for (int i = 0; i < 10; i++) | | |||
1532 | resultLine[i]->clear(); | | |||
1533 | } | | |||
1534 | | ||||
1535 | void HypothesisTestPrivate::clearTestView() { | | |||
1536 | statsTable = ""; | | |||
1537 | clearSummaryLayout(); | | |||
1538 | } | | |||
1539 | | ||||
1540 | 1598 | | |||
1541 | /********************************************************************************** | 1599 | /********************************************************************************** | ||
1542 | * virtual functions implementations | 1600 | * virtual functions implementations | ||
Context not available. |
I would suggest to use sqrt() from cmath instead of qSqrt(). What do you think?