diff --git a/src/backend/hypothesisTest/HypothesisTest.h b/src/backend/hypothesisTest/HypothesisTest.h --- a/src/backend/hypothesisTest/HypothesisTest.h +++ b/src/backend/hypothesisTest/HypothesisTest.h @@ -94,8 +94,8 @@ void performLeveneTest(bool categorical_variable); - double statisticValue(); - double pValue(); + QList statisticValue(); + QList pValue(); QVBoxLayout* summaryLayout(); diff --git a/src/backend/hypothesisTest/HypothesisTest.cpp b/src/backend/hypothesisTest/HypothesisTest.cpp --- a/src/backend/hypothesisTest/HypothesisTest.cpp +++ b/src/backend/hypothesisTest/HypothesisTest.cpp @@ -47,8 +47,6 @@ #include #include -#include - extern "C" { #include "backend/nsl/nsl_stats.h" } @@ -106,6 +104,12 @@ void HypothesisTest::performTest(Test test, bool categoricalVariable, bool equalVariance) { d->tailType = test.tail; + d->pValue.clear(); + d->statisticValue.clear(); + d->statsTable = ""; + for (int i = 0; i < 10; i++) + d->resultLine[i]->clear(); + switch (test.subtype) { case HypothesisTest::Test::SubType::TwoSampleIndependent: { d->currTestName = "

" + i18n("Two Sample Independent Test") + "

"; @@ -141,15 +145,14 @@ void HypothesisTest::performLeveneTest(bool categoricalVariable) { d->currTestName = "

" + i18n("Levene Test for Equality of Variance") + "

"; d->performLeveneTest(categoricalVariable); - emit changed(); } -double HypothesisTest::statisticValue() { +QList HypothesisTest::statisticValue() { return d->statisticValue; } -double HypothesisTest::pValue() { +QList HypothesisTest::pValue() { return d->pValue; } @@ -203,8 +206,6 @@ /**************************Two Sample Independent *************************************/ void HypothesisTestPrivate::performTwoSampleIndependentTest(HypothesisTest::Test::Type test, bool categoricalVariable, bool equalVariance) { - clearTestView(); - if (columns.size() != 2) { printError("Inappropriate number of columns selected"); return; @@ -300,7 +301,7 @@ sp = qSqrt(((n[0]-1) * gsl_pow_2(std[0]) + (n[1]-1) * gsl_pow_2(std[1]) ) / df ); - statisticValue = (mean[0] - mean[1]) / (sp * qSqrt(1.0/n[0] + 1.0/n[1])); + statisticValue.append((mean[0] - mean[1]) / (sp * qSqrt(1.0/n[0] + 1.0/n[1]))); printLine(9, "Assumption: Equal Variance b/w both population means"); } else { double temp_val; @@ -309,8 +310,8 @@ (gsl_pow_2( (gsl_pow_2(std[1]) / n[1]) ) / (n[1]-1))); df = qRound(temp_val); - statisticValue = (mean[0] - mean[1]) / (qSqrt( (gsl_pow_2(std[0])/n[0]) + - (gsl_pow_2(std[1])/n[1]))); + statisticValue.append((mean[0] - mean[1]) / (qSqrt( (gsl_pow_2(std[0])/n[0]) + + (gsl_pow_2(std[1])/n[1])))); printLine(9, "Assumption: UnEqual Variance b/w both population means"); } @@ -320,8 +321,8 @@ case HypothesisTest::Test::Type::ZTest: { testName = "Z"; sp = qSqrt( ((n[0]-1) * gsl_pow_2(std[0]) + (n[1]-1) * gsl_pow_2(std[1])) / df); - statisticValue = (mean[0] - mean[1]) / (sp * qSqrt( 1.0 / n[0] + 1.0 / n[1])); - pValue = gsl_cdf_gaussian_P(statisticValue, sp); + statisticValue.append((mean[0] - mean[1]) / (sp * qSqrt( 1.0 / n[0] + 1.0 / n[1]))); +// pValue.append(gsl_cdf_gaussian_P(statisticValue, sp)); break; } case HypothesisTest::Test::Type::Anova: @@ -330,19 +331,19 @@ } currTestName = "

" + i18n("Two Sample Independent %1 Test for %2 vs %3", testName, col1Name, col2Name) + "

"; - pValue = getPValue(test, statisticValue, col1Name, col2Name, (mean[0] - mean[1]), sp, df); + pValue.append(getPValue(test, statisticValue[0], col1Name, col2Name, (mean[0] - mean[1]), sp, df)); printLine(2, i18n("Significance level is %1", round(significanceLevel)), "blue"); - printLine(4, i18n("%1 Value is %2 ", testName, round(statisticValue)), "green"); + printLine(4, i18n("%1 Value is %2 ", testName, round(statisticValue[0])), "green"); printTooltip(4, i18n("More is the |%1-value|, more safely we can reject the null hypothesis", testName)); - printLine(5, i18n("P Value is %1 ", pValue), "green"); + printLine(5, i18n("P Value is %1 ", pValue[0]), "green"); printLine(6, i18n("Degree of Freedom is %1", df), "green"); printTooltip(6, i18n("Number of independent Pieces of information that went into calculating the estimate")); - if (pValue <= significanceLevel) + if (pValue[0] <= significanceLevel) printTooltip(5, i18n("We can safely reject Null Hypothesis for significance level %1", round(significanceLevel))); else printTooltip(5, i18n("There is a plausibility for Null Hypothesis to be true")); @@ -352,8 +353,6 @@ /********************************Two Sample Paired ***************************************/ void HypothesisTestPrivate::performTwoSamplePairedTest(HypothesisTest::Test::Type test) { - clearTestView(); - if (columns.size() != 2) { printError("Inappropriate number of columns selected"); @@ -403,7 +402,7 @@ switch (test) { case HypothesisTest::Test::Type::TTest: { - statisticValue = mean / (std / qSqrt(n)); + statisticValue[0] = mean / (std / qSqrt(n)); df = n - 1; testName = "T"; printLine(6, i18n("Degree of Freedom is %1name(), i18n("%1", populationMean), mean, std, df); + pValue.append(getPValue(test, statisticValue[0], columns[0]->name(), i18n("%1", populationMean), mean, std, df)); currTestName = "

" + i18n("One Sample %1 Test for %2 vs %3", testName, columns[0]->name(), columns[1]->name()) + "

"; printLine(2, i18n("Significance level is %1 ", round(significanceLevel)), "blue"); - printLine(4, i18n("%1 Value is %2 ", testName, round(statisticValue)), "green"); - printLine(5, i18n("P Value is %1 ", pValue), "green"); + printLine(4, i18n("%1 Value is %2 ", testName, round(statisticValue[0])), "green"); + printLine(5, i18n("P Value is %1 ", pValue[0]), "green"); - if (pValue <= significanceLevel) + if (pValue[0] <= significanceLevel) printTooltip(5, i18n("We can safely reject Null Hypothesis for significance level %1", significanceLevel)); else printTooltip(5, i18n("There is a plausibility for Null Hypothesis to be true")); @@ -440,8 +439,6 @@ /******************************** One Sample ***************************************/ void HypothesisTestPrivate::performOneSampleTest(HypothesisTest::Test::Type test) { - clearTestView(); - if (columns.size() != 1) { printError("Inappropriate number of columns selected"); @@ -488,7 +485,7 @@ switch (test) { case HypothesisTest::Test::Type::TTest: { testName = "T"; - statisticValue = (mean - populationMean) / (std / qSqrt(n)); + statisticValue.append((mean - populationMean) / (std / qSqrt(n))); df = n - 1; printLine(6, i18n("Degree of Freedom is %1", df), "blue"); break; @@ -496,7 +493,7 @@ case HypothesisTest::Test::Type::ZTest: { testName = "Z"; df = 0; - statisticValue = (mean - populationMean) / (std / qSqrt(n)); + statisticValue.append((mean - populationMean) / (std / qSqrt(n))); break; } case HypothesisTest::Test::Type::Anova: @@ -504,14 +501,14 @@ break; } - pValue = getPValue(test, statisticValue, columns[0]->name(), i18n("%1",populationMean), mean - populationMean, std, df); + pValue.append(getPValue(test, statisticValue[0], columns[0]->name(), i18n("%1",populationMean), mean - populationMean, std, df)); currTestName = "

" + i18n("One Sample %1 Test for %2", testName, columns[0]->name()) + "

"; printLine(2, i18n("Significance level is %1", round(significanceLevel)), "blue"); - printLine(4, i18n("%1 Value is %2", testName, round(statisticValue)), "green"); - printLine(5, i18n("P Value is %1", pValue), "green"); + printLine(4, i18n("%1 Value is %2", testName, round(statisticValue[0])), "green"); + printLine(5, i18n("P Value is %1", pValue[0]), "green"); - if (pValue <= significanceLevel) + if (pValue[0] <= significanceLevel) printTooltip(5, i18n("We can safely reject Null Hypothesis for significance level %1", significanceLevel)); else printTooltip(5, i18n("There is a plausibility for Null Hypothesis to be true")); @@ -526,8 +523,7 @@ // w stands for within groups // np is number of partition i.e., number of classes void HypothesisTestPrivate::performOneWayAnova() { - clearTestView(); - int np, totalRows; + int np, totalRows; countPartitions(columns[0], np, totalRows); int* ni = new int[np]; @@ -551,8 +547,6 @@ double sW = 0; // sum of squares of (value - mean of group) within the groups int fW = 0; // degree of freedom within the group double msW = 0; // mean sum of squares within the groups - double fValue = 0; - // now finding mean of each group; @@ -573,10 +567,10 @@ msB = sB / fB; msW = sW / fW; - fValue = msB / msW; + statisticValue.append(msB / msW); - pValue = nsl_stats_fdist_p(fValue, static_cast(np-1), fW); + pValue.append(nsl_stats_fdist_p(statisticValue[0], static_cast(np-1), fW)); QMapIterator i(classnameToIndex); while (i.hasNext()) { @@ -643,10 +637,10 @@ delete[] std; delete[] colNames; - printLine(1, i18n("F Value is %1", round(fValue)), "green"); - printLine(2, i18n("P Value is %1 ", pValue), "green"); + printLine(1, i18n("F Value is %1", round(statisticValue[0])), "green"); + printLine(2, i18n("P Value is %1 ", pValue[0]), "green"); - if (pValue <= significanceLevel) + if (pValue[0] <= significanceLevel) printTooltip(2, i18n("We can safely reject Null Hypothesis for significance level %1", significanceLevel)); else printTooltip(2, i18n("There is a plausibility for Null Hypothesis to be true")); @@ -659,9 +653,9 @@ // all formulas and symbols are taken from: http://statweb.stanford.edu/~susan/courses/s141/exanova.pdf //TODO: suppress warning of variable length array are a C99 feature. -//TODO: changed int mean to double mean; +//TODO: add assumptions verification option +//TODO: add tail option (if needed) void HypothesisTestPrivate::performTwoWayAnova() { - clearTestView(); int np_a, totalRows_a; int np_b, totalRows_b; countPartitions(columns[0], np_a, totalRows_a); @@ -720,9 +714,9 @@ groupMean[i][j] /= replicates[i][j]; } - for (int i = 0; i < np_a; i++) - for (int j = 0; j < np_b; j++) - groupMean[i][j] = int(groupMean[i][j]); +// for (int i = 0; i < np_a; i++) +// for (int j = 0; j < np_b; j++) +// groupMean[i][j] = int(groupMean[i][j]); double ss_within = 0; for (int i = 0; i < totalRows_a; i++) { @@ -749,15 +743,32 @@ for (int i = 0; i < np_a; i++) mean += mean_a[i] / np_a; - QDEBUG("ss_within is " << ss_within); - QDEBUG("df_within is " << df_within); - QDEBUG("ms_within is " << ms_within); + double ss_a = 0; for (int i = 0; i < np_a; i++) - QDEBUG("mean_a is " << mean_a[i]); + ss_a += gsl_pow_2(mean_a[i] - mean); + ss_a *= replicate * np_b; + + int df_a = np_a - 1; + double ms_a = ss_a / df_a; + + double ss_b = 0; for (int i = 0; i < np_b; i++) - QDEBUG("mean_b is " << mean_b[i]); + ss_b += gsl_pow_2(mean_b[i] - mean); + ss_b *= replicate * np_a; + + int df_b = np_b - 1; + double ms_b = ss_b / df_b; + + double ss_interaction = 0; + + for (int i = 0; i < np_a; i++) + for (int j = 0; j < np_b; j++) + ss_interaction += gsl_pow_2(groupMean[i][j] - mean_a[i] - mean_b[j] + mean); + ss_interaction *= replicate; + int df_interaction = (np_a - 1) * (np_b - 1); + double ms_interaction = ss_interaction / df_interaction; QString partitionNames_a[np_a]; QString partitionNames_b[np_b]; @@ -791,24 +802,79 @@ for (int i = 0; i < np_a; i++) { rowMajor.append(new Cell(partitionNames_a[i], level, true)); for (int j = 0; j < np_b; j++) { - rowMajor.append(new Cell(groupMean[i][j], level)); + rowMajor.append(new Cell(round(groupMean[i][j]), level)); rowMajor.append(new Cell(replicates[i][j], level)); } - rowMajor.append(new Cell(mean_a[i], level)); + rowMajor.append(new Cell(round(mean_a[i]), level)); level++; } rowMajor.append(new Cell("Mean", level, true)); for (int i = 0; i < np_b; i++) - rowMajor.append(new Cell(mean_b[i], level, false, 1, 2)); - rowMajor.append(new Cell(mean, level)); + rowMajor.append(new Cell(round(mean_b[i]), level, false, 1, 2)); + rowMajor.append(new Cell(round(mean), level)); statsTable = "

" + i18n("Contingency Table") + "

"; statsTable += getHtmlTable3(rowMajor); -// QDEBUG(""); -// QDEBUG(""); -// QDEBUG(statsTable); + statsTable += "
"; + statsTable += "

" + i18n("results table") + "

"; + + rowMajor.clear(); + level = 0; + rowMajor.append(new Cell("", level, true)); + rowMajor.append(new Cell("SS", level, true)); + rowMajor.append(new Cell("DF", level, true)); + rowMajor.append(new Cell("MS", level, true)); + + level++; + rowMajor.append(new Cell(columns[0]->name(), level, true)); + rowMajor.append(new Cell(round(ss_a), level)); + rowMajor.append(new Cell(df_a, level)); + rowMajor.append(new Cell(round(ms_a), level)); + + level++; + rowMajor.append(new Cell(columns[1]->name(), level, true)); + rowMajor.append(new Cell(round(ss_b), level)); + rowMajor.append(new Cell(df_b, level)); + rowMajor.append(new Cell(round(ms_b), level)); + + level++; + rowMajor.append(new Cell("Interaction", level, true)); + rowMajor.append(new Cell(round(ss_interaction), level)); + rowMajor.append(new Cell(df_interaction, level)); + rowMajor.append(new Cell(round(ms_interaction), level)); + + level++; + rowMajor.append(new Cell("Within", level, true)); + rowMajor.append(new Cell(round(ss_within), level)); + rowMajor.append(new Cell(df_within, level)); + rowMajor.append(new Cell(round(ms_within), level)); + + statsTable += getHtmlTable3(rowMajor); + + double fValue_a = ms_a / ms_within; + double fValue_b = ms_b / ms_within; + double fValue_interaction = ms_interaction / ms_within; + + double pValue_a = nsl_stats_fdist_p(fValue_a, static_cast(np_a - 1), df_a); + double pValue_b = nsl_stats_fdist_p(fValue_b, static_cast(np_b - 1), df_b); + + printLine(0, "F(df" + columns[0]->name() + ", dfwithin) is " + round(fValue_a), "blue"); + printLine(1, "F(df" + columns[1]->name() + ", dfwithin) is " + round(fValue_b), "blue"); + printLine(2, "F(dfinteraction, dfwithin) is " + round(fValue_interaction), "blue"); + + printLine(4, "P(df" + columns[0]->name() + ", dfwithin) is " + round(pValue_a), "blue"); + printLine(5, "P(df" + columns[1]->name() + ", dfwithin) is " + round(pValue_b), "blue"); +// printLine(2, "P(dfinteraction, dfwithin) is " + round(fValue_interaction), "blue"); + + statisticValue.append(fValue_a); + statisticValue.append(fValue_b); + statisticValue.append(fValue_interaction); + + pValue.append(pValue_a); + pValue.append(pValue_b); + return; } @@ -825,8 +891,6 @@ // ziBarBar = mean for all zij // ni = number of elements in group i void HypothesisTestPrivate::performLeveneTest(bool categoricalVariable) { - clearTestView(); - if (columns.size() != 2) { printError("Inappropriate number of columns selected"); return; @@ -1045,16 +1109,16 @@ delete[] ziBar; delete[] ni; - pValue = nsl_stats_fdist_p(fValue, static_cast(np-1), df); + pValue.append(nsl_stats_fdist_p(fValue, static_cast(np-1), df)); printLine(0, "Null Hypothesis: Variance is equal between all classes", "blue"); printLine(1, "Alternate Hypothesis: Variance is not equal in at-least one pair of classes", "blue"); printLine(2, i18n("Significance level is %1", round(significanceLevel)), "blue"); printLine(4, i18n("F Value is %1 ", round(fValue)), "green"); - printLine(5, i18n("P Value is %1 ", pValue), "green"); + printLine(5, i18n("P Value is %1 ", pValue[0]), "green"); printLine(6, i18n("Degree of Freedom is %1", df), "green"); - if (pValue <= significanceLevel) { + if (pValue[0] <= significanceLevel) { printTooltip(5, i18n("We can safely reject Null Hypothesis for significance level %1", significanceLevel)); printLine(8, "Requirement for homogeneity is not met", "red"); } else { @@ -1062,6 +1126,7 @@ printLine(8, "Requirement for homogeneity is met", "green"); } + statisticValue.append(fValue); return; } @@ -1247,28 +1312,28 @@ //TODO change ("⋖") symbol to ("<"), currently macro UTF8_QSTRING is not working properly if used "<" symbol; // TODO: check for correctness between: for TestZ with TailTwo -// pValue = 2*gsl_cdf_tdist_P(value, df) v/s -// pValue = gsl_cdf_tdis_P(value, df) + gsl_cdf_tdis_P(-value, df); +// pValue.append(2*gsl_cdf_tdist_P(value, df) v/s +// pValue.append(gsl_cdf_tdis_P(value, df) + gsl_cdf_tdis_P(-value, df); double HypothesisTestPrivate::getPValue(const HypothesisTest::Test::Type& test, double& value, const QString& col1Name, const QString& col2Name, const double mean, const double sp, const int df) { switch (test) { case HypothesisTest::Test::Type::TTest: { switch (tailType) { case HypothesisTest::Test::Tail::Negative: { - pValue = gsl_cdf_tdist_P(value, df); + pValue.append(gsl_cdf_tdist_P(value, df)); printLine(0, i18n("Null Hypothesis: Population mean of %1 %2 Population mean of %3", col1Name, UTF8_QSTRING("≥"), col2Name), "blue"); printLine(1, i18n("Alternate Hypothesis: Population mean of %1 %2 Population mean of %3", col1Name, UTF8_QSTRING("⋖"), col2Name), "blue"); break; } case HypothesisTest::Test::Tail::Positive: { value *= -1; - pValue = gsl_cdf_tdist_P(value, df); + pValue.append(gsl_cdf_tdist_P(value, df)); printLine(0, i18n("Null Hypothesis: Population mean of %1 %2 Population mean of %3", col1Name, UTF8_QSTRING("≤"), col2Name), "blue"); printLine(1, i18n("Alternate Hypothesis: Population mean of %1 %2 Population mean of %3", col1Name, UTF8_QSTRING(">"), col2Name), "blue"); break; } case HypothesisTest::Test::Tail::Two: { - pValue = 2.*gsl_cdf_tdist_P(-fabs(value), df); + pValue.append(2.*gsl_cdf_tdist_P(-fabs(value), df)); printLine(0, i18n("Null Hypothesis: Population mean of %1 %2 Population mean of %3", col1Name, UTF8_QSTRING("="), col2Name), "blue"); printLine(1, i18n("Alternate Hypothesis: Population mean of %1 %2 Population mean of %3", col1Name, UTF8_QSTRING("≠"), col2Name), "blue"); @@ -1280,20 +1345,20 @@ case HypothesisTest::Test::Type::ZTest: { switch (tailType) { case HypothesisTest::Test::Tail::Negative: { - pValue = gsl_cdf_gaussian_P(value - mean, sp); + pValue.append(gsl_cdf_gaussian_P(value - mean, sp)); printLine(0, i18n("Null Hypothesis: Population mean of %1 %2 Population mean of %3 ", col1Name, UTF8_QSTRING("≥"), col2Name), "blue"); printLine(1, i18n("Alternate Hypothesis: Population mean of %1 %2 Population mean of %3 ", col1Name, UTF8_QSTRING("⋖"), col2Name), "blue"); break; } case HypothesisTest::Test::Tail::Positive: { value *= -1; - pValue = nsl_stats_tdist_p(value - mean, sp); + pValue.append(nsl_stats_tdist_p(value - mean, sp)); printLine(0, i18n("Null Hypothesis: Population mean of %1 %2 Population mean of %3 ", col1Name, UTF8_QSTRING("≤"), col2Name), "blue"); printLine(1, i18n("Alternate Hypothesis: Population mean of %1 %2 Population mean of %3 ", col1Name, UTF8_QSTRING(">"), col2Name), "blue"); break; } case HypothesisTest::Test::Tail::Two: { - pValue = 2.*gsl_cdf_gaussian_P(value - mean, sp); + pValue.append(2.*gsl_cdf_gaussian_P(value - mean, sp)); printLine(0, i18n("Null Hypothesis: Population mean of %1 %2 Population mean of %3 ", col1Name, UTF8_QSTRING("="), col2Name), "blue"); printLine(1, i18n("Alternate Hypothesis: Population mean of %1 %2 Population mean of %3 ", col1Name, UTF8_QSTRING("≠"), col2Name), "blue"); break; @@ -1306,9 +1371,9 @@ break; } - if (pValue > 1) + if (pValue[0] > 1) return 1; - return pValue; + return pValue[0]; } int HypothesisTestPrivate::setSpanValues(HypothesisTestPrivate::Node* root, int& totalLevels) { @@ -1473,7 +1538,9 @@ ".tg {border-collapse:collapse;border: 1px solid black;}" ".tg td{font-family:Arial, sans-serif;font-size:14px;padding:10px 5px;border: 1px solid black;overflow:hidden;word-break:normal;color:#333;background-color:#fff;}" ".tg th{font-family:Arial, sans-serif;font-size:14px;font-weight:normal;padding:10px 5px;border: 1px solid black;overflow:hidden;word-break:normal;color:#333;background-color:#f0f0f0;}" - ""; + ""; + + table += "
"; table += " "; int prevLevel = 0; @@ -1501,6 +1568,7 @@ cellEndTag; } table += " "; + table += "
"; return table; } @@ -1527,16 +1595,6 @@ printLine(0, errorMsg, "red"); } -void HypothesisTestPrivate::clearSummaryLayout() { - for (int i = 0; i < 10; i++) - resultLine[i]->clear(); -} - -void HypothesisTestPrivate::clearTestView() { - statsTable = ""; - clearSummaryLayout(); -} - /********************************************************************************** * virtual functions implementations diff --git a/src/backend/hypothesisTest/HypothesisTestPrivate.h b/src/backend/hypothesisTest/HypothesisTestPrivate.h --- a/src/backend/hypothesisTest/HypothesisTestPrivate.h +++ b/src/backend/hypothesisTest/HypothesisTestPrivate.h @@ -90,8 +90,8 @@ double significanceLevel; QString statsTable; HypothesisTest::Test::Tail tailType; - double pValue{0}; - double statisticValue{0}; + QList pValue; + QList statisticValue; QVBoxLayout* summaryLayout{nullptr}; QLabel* resultLine[10]; @@ -118,9 +118,6 @@ void printLine(const int& index, const QString& msg, const QString& color = "black"); void printTooltip(const int& index, const QString& msg); void printError(const QString& errorMsg); - void clearTestView(); - - void clearSummaryLayout(); bool m_dbCreated{false}; }; diff --git a/tests/stats/CMakeLists.txt b/tests/stats/CMakeLists.txt --- a/tests/stats/CMakeLists.txt +++ b/tests/stats/CMakeLists.txt @@ -1 +1,2 @@ -add_subdirectory(ttest) \ No newline at end of file +add_subdirectory(ttest) +add_subdirectory(anova) diff --git a/tests/stats/anova/AnovaTest.h b/tests/stats/anova/AnovaTest.h new file mode 100644 --- /dev/null +++ b/tests/stats/anova/AnovaTest.h @@ -0,0 +1,42 @@ +/*************************************************************************** + File : AnovaTest.h + Project : LabPlot + Description : Tests for data correlation + -------------------------------------------------------------------- + Copyright : (C) 2019 Devanshu Agarwal (agarwaldevanshu8@gmail.com) + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + * This program is distributed in the hope that it will be useful, * + * but WITHOUT ANY WARRANTY; without even the implied warranty of * + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * + * GNU General Public License for more details. * + * * + * You should have received a copy of the GNU General Public License * + * along with this program; if not, write to the Free Software * + * Foundation, Inc., 51 Franklin Street, Fifth Floor, * + * Boston, MA 02110-1301 USA * + * * + ***************************************************************************/ +#ifndef ANOVATEST_H +#define ANOVATEST_H + +#include <../../CommonTest.h> + +class AnovaTest : public CommonTest { + Q_OBJECT + +private slots: + void oneWayAnova_data(); + void oneWayAnova(); + + void twoWayAnova_data(); + void twoWayAnova(); +}; +#endif diff --git a/tests/stats/anova/AnovaTest.cpp b/tests/stats/anova/AnovaTest.cpp new file mode 100644 --- /dev/null +++ b/tests/stats/anova/AnovaTest.cpp @@ -0,0 +1,183 @@ +/*************************************************************************** + File : AnovaTest.cpp + Project : LabPlot + Description : Tests for data correlation + -------------------------------------------------------------------- + Copyright : (C) 2019 Devanshu Agarwal (agarwaldevanshu8@gmail.com) + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + * This program is distributed in the hope that it will be useful, * + * but WITHOUT ANY WARRANTY; without even the implied warranty of * + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * + * GNU General Public License for more details. * + * * + * You should have received a copy of the GNU General Public License * + * along with this program; if not, write to the Free Software * + * Foundation, Inc., 51 Franklin Street, Fifth Floor, * + * Boston, MA 02110-1301 USA * + * * + ***************************************************************************/ + +#include "AnovaTest.h" +#include "backend/hypothesisTest/HypothesisTest.h" + +#include "backend/core/AbstractColumn.h" +#include "backend/core/column/Column.h" + +void AnovaTest::oneWayAnova_data() { + QTest::addColumn>("col1Data"); + QTest::addColumn>("col2Data"); + QTest::addColumn("fValue_expected"); + QTest::addColumn("pValue_expected"); + + // First Sample + QVector col1Data = {"1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", + "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", + "3", "3", "3", "3", "3", "3", "3", "3", "3", "3", "3", "3", "3", "3", "3", "3", "3", "3", "3", "3", + "4", "4", "4", "4", "4", "4", "4", "4", "4", "4", "4", "4", "4", "4", "4", "4", "4", "4", "4", "4", + "5", "5", "5", "5", "5", "5", "5", "5", "5", "5", "5", "5", "5", "5", "5", "5", "5", "5", "5", "5"}; + + QVector col2Data = {1, 43, 15, 40, 8, 17, 30, 34, 34, 26, 1, 7, 22, 30, 40, 15, 20, 9, 14, 15, + 6, 30, 15, 30, 12, 17, 21, 23, 20, 27, -19, -18, -8, -6, -6, -9, -17, -12, -11, -6, + 5, 8, 12, 19, 8, 15, 21, 28, 26, 27, -10, 6, 4, 3, 0, 4, 9, -5, 7, 13, + 38, 20, 20, 28, 11, 17, 15, 27, 24, 23, 28, 26, 34, 32, 24, 29, 30, 24, 34, 23, + -5, -12, -15, -4, -2, -6, -2, -7, -10, -15, -13, -16, -23, -22, -9, -18, -17, -15, -14, -15}; + double fValue_expected = 33.1289; + double pValue_expected = 0; + + QTest::newRow("socscistatistics") << col1Data << col2Data << fValue_expected << pValue_expected; +} + +void AnovaTest::oneWayAnova() { + QFETCH(QVector, col1Data); + QFETCH(QVector, col2Data); + QFETCH(double, fValue_expected); + QFETCH(double, pValue_expected); + + Column* col1 = new Column("col1", AbstractColumn::Text); + Column* col2 = new Column("col2", AbstractColumn::Numeric); + + col1->replaceTexts(0, col1Data); + col2->replaceValues(0, col2Data); + + QVector cols; + cols << col1 << col2; + + HypothesisTest anovaTest("One Way Anova"); + anovaTest.setColumns(cols); + + HypothesisTest::Test test; + test.type = HypothesisTest::Test::Type::Anova; + test.subtype = HypothesisTest::Test::SubType::OneWay; + test.tail = HypothesisTest::Test::Tail::Two; + + bool categoricalVariable = true; + bool equalVariance = true; + + anovaTest.performTest(test, categoricalVariable, equalVariance); + + double fValue = anovaTest.statisticValue()[0]; + double pValue = anovaTest.pValue()[0]; + + QDEBUG("fValue is " << fValue); + QDEBUG("pValue is: " << pValue); + QDEBUG("fValue_expected is " << fValue_expected); + QDEBUG("pValue_expected is: " << pValue_expected); + + FuzzyCompare(fValue, fValue_expected, 0.1); + FuzzyCompare(pValue, pValue_expected, 0.1); +} + +void AnovaTest::twoWayAnova_data() { + QTest::addColumn>("col1Data"); + QTest::addColumn>("col2Data"); + QTest::addColumn>("col3Data"); + QTest::addColumn("fCol1Value_expected"); + QTest::addColumn("fCol2Value_expected"); + QTest::addColumn("fInteractionValue_expected"); + QTest::addColumn("pCol1Value_expected"); + QTest::addColumn("pCol2Value_expected"); + + // First Sample + // This data set is taken from: http://statweb.stanford.edu/~susan/courses/s141/exanova.pdf + QVector col1Data = {"Super", "Super", "Super", "Super", "Super", "Super", "Super", "Super", "Super", "Super", "Super", "Super", "Best", "Best", "Best", "Best", "Best", "Best", "Best", "Best", "Best", "Best", "Best", "Best"}; + QVector col2Data = {"cold", "cold", "cold", "cold", "warm", "warm", "warm", "warm", "hot", "hot", "hot", "hot", "cold", "cold", "cold", "cold", "warm", "warm", "warm", "warm", "hot", "hot", "hot", "hot"}; + QVector col3Data = {4, 5, 6, 5, 7, 9, 8, 12, 10, 12, 11, 9, 6, 6, 4, 4, 13, 15, 12, 12, 12, 13, 10, 13}; + double fCol1Value_expected = 9.8108; + double fCol2Value_expected = 48.7297; + double fInteractionValue_expected = 3.9730; + double pCol1Value_expected = 0.005758; + double pCol2Value_expected = 5.44e-08; +// double pInteractionValue_expected = 0.037224; + + QTest::newRow("detergent vs temperature") << col1Data << col2Data << col3Data << + fCol1Value_expected << fCol2Value_expected << fInteractionValue_expected << + pCol1Value_expected << pCol2Value_expected; +} + +//TODO: check for pValue. In document probabilty is Pr(>F) +void AnovaTest::twoWayAnova() { + QFETCH(QVector, col1Data); + QFETCH(QVector, col2Data); + QFETCH(QVector, col3Data); + QFETCH(double, fCol1Value_expected); + QFETCH(double, fCol2Value_expected); + QFETCH(double, fInteractionValue_expected); + QFETCH(double, pCol1Value_expected); + QFETCH(double, pCol2Value_expected); + + Column* col1 = new Column("col1", AbstractColumn::Text); + Column* col2 = new Column("col2", AbstractColumn::Text); + Column* col3 = new Column("col3", AbstractColumn::Numeric); + + col1->replaceTexts(0, col1Data); + col2->replaceTexts(0, col2Data); + col3->replaceValues(0, col3Data); + + QVector cols; + cols << col1 << col2 << col3; + + HypothesisTest anovaTest("Two Way Anova"); + anovaTest.setColumns(cols); + + HypothesisTest::Test test; + test.type = HypothesisTest::Test::Type::Anova; + test.subtype = HypothesisTest::Test::SubType::TwoWay; + test.tail = HypothesisTest::Test::Tail::Two; + + anovaTest.performTest(test); + double fCol1Value = anovaTest.statisticValue()[0]; + double fCol2Value = anovaTest.statisticValue()[1]; + double fInteractionValue = anovaTest.statisticValue()[2]; + + double pCol1Value = anovaTest.pValue()[0]; + double pCol2Value = anovaTest.pValue()[1]; + + QDEBUG("size of statistic value is " << anovaTest.statisticValue().size()); + QDEBUG("fCol1Value is " << fCol1Value); + QDEBUG("fCol1Value_expected is " << fCol1Value_expected); + QDEBUG("fCol2Value is " << fCol2Value); + QDEBUG("fCol2Value_expected is " << fCol2Value_expected); + QDEBUG("fInteractionValue is " << fInteractionValue); + QDEBUG("fInteractionValue_expected is " << fInteractionValue_expected); + + QDEBUG("pCol1Value is " << pCol1Value); + QDEBUG("pCol1Value_expected is " << pCol1Value_expected); + QDEBUG("pCol2Value is " << pCol2Value); + QDEBUG("pCol2Value_expected is " << pCol2Value_expected); + + FuzzyCompare(fCol1Value, fCol1Value_expected, 0.1); + FuzzyCompare(fCol2Value, fCol2Value_expected, 0.1); + FuzzyCompare(fInteractionValue, fInteractionValue_expected, 0.1); + FuzzyCompare(pCol1Value, pCol1Value_expected, 0.1); + FuzzyCompare(pCol2Value, pCol2Value_expected, 0.1); +} + +QTEST_MAIN(AnovaTest) diff --git a/tests/stats/anova/CMakeLists.txt b/tests/stats/anova/CMakeLists.txt new file mode 100644 --- /dev/null +++ b/tests/stats/anova/CMakeLists.txt @@ -0,0 +1,7 @@ +add_executable (anova AnovaTest.cpp ../../CommonTest.cpp) + +target_link_libraries(anova Qt5::Test) +target_link_libraries(anova KF5::Archive KF5::XmlGui) +target_link_libraries(anova labplot2lib) + +add_test(NAME anova COMMAND anova) diff --git a/tests/stats/ttest/TTestTest.cpp b/tests/stats/ttest/TTestTest.cpp --- a/tests/stats/ttest/TTestTest.cpp +++ b/tests/stats/ttest/TTestTest.cpp @@ -82,8 +82,8 @@ bool equalVariance = true; tTest.performTest(test, categoricalVariable, equalVariance); - double tValue = tTest.statisticValue(); - double pValue = tTest.pValue(); + double tValue = tTest.statisticValue()[0]; + double pValue = tTest.pValue()[0]; qDebug() << "tValue is " << tValue; qDebug() << "pValue is: " << pValue; @@ -135,8 +135,8 @@ test.tail = HypothesisTest::Test::Tail::Two; tTest.performTest(test); - double tValue = tTest.statisticValue(); - double pValue = tTest.pValue(); + double tValue = tTest.statisticValue()[0]; + double pValue = tTest.pValue()[0]; qDebug() << "tValue is " << tValue; qDebug() << "pValue is: " << pValue; @@ -185,8 +185,8 @@ test.tail = HypothesisTest::Test::Tail::Two; tTest.performTest(test); - double tValue = tTest.statisticValue(); - double pValue = tTest.pValue(); + double tValue = tTest.statisticValue()[0]; + double pValue = tTest.pValue()[0]; qDebug() << "tValue is " << tValue; qDebug() << "pValue is: " << pValue;