diff --git a/src/backend/hypothesisTest/HypothesisTest.h b/src/backend/hypothesisTest/HypothesisTest.h
--- a/src/backend/hypothesisTest/HypothesisTest.h
+++ b/src/backend/hypothesisTest/HypothesisTest.h
@@ -94,8 +94,8 @@
void performLeveneTest(bool categorical_variable);
- double statisticValue();
- double pValue();
+ QList statisticValue();
+ QList pValue();
QVBoxLayout* summaryLayout();
diff --git a/src/backend/hypothesisTest/HypothesisTest.cpp b/src/backend/hypothesisTest/HypothesisTest.cpp
--- a/src/backend/hypothesisTest/HypothesisTest.cpp
+++ b/src/backend/hypothesisTest/HypothesisTest.cpp
@@ -47,8 +47,6 @@
#include
#include
-#include
-
extern "C" {
#include "backend/nsl/nsl_stats.h"
}
@@ -106,6 +104,12 @@
void HypothesisTest::performTest(Test test, bool categoricalVariable, bool equalVariance) {
d->tailType = test.tail;
+ d->pValue.clear();
+ d->statisticValue.clear();
+ d->statsTable = "";
+ for (int i = 0; i < 10; i++)
+ d->resultLine[i]->clear();
+
switch (test.subtype) {
case HypothesisTest::Test::SubType::TwoSampleIndependent: {
d->currTestName = "" + i18n("Two Sample Independent Test") + "
";
@@ -141,15 +145,14 @@
void HypothesisTest::performLeveneTest(bool categoricalVariable) {
d->currTestName = "" + i18n("Levene Test for Equality of Variance") + "
";
d->performLeveneTest(categoricalVariable);
-
emit changed();
}
-double HypothesisTest::statisticValue() {
+QList HypothesisTest::statisticValue() {
return d->statisticValue;
}
-double HypothesisTest::pValue() {
+QList HypothesisTest::pValue() {
return d->pValue;
}
@@ -203,8 +206,6 @@
/**************************Two Sample Independent *************************************/
void HypothesisTestPrivate::performTwoSampleIndependentTest(HypothesisTest::Test::Type test, bool categoricalVariable, bool equalVariance) {
- clearTestView();
-
if (columns.size() != 2) {
printError("Inappropriate number of columns selected");
return;
@@ -300,7 +301,7 @@
sp = qSqrt(((n[0]-1) * gsl_pow_2(std[0]) +
(n[1]-1) * gsl_pow_2(std[1]) ) / df );
- statisticValue = (mean[0] - mean[1]) / (sp * qSqrt(1.0/n[0] + 1.0/n[1]));
+ statisticValue.append((mean[0] - mean[1]) / (sp * qSqrt(1.0/n[0] + 1.0/n[1])));
printLine(9, "Assumption: Equal Variance b/w both population means");
} else {
double temp_val;
@@ -309,8 +310,8 @@
(gsl_pow_2( (gsl_pow_2(std[1]) / n[1]) ) / (n[1]-1)));
df = qRound(temp_val);
- statisticValue = (mean[0] - mean[1]) / (qSqrt( (gsl_pow_2(std[0])/n[0]) +
- (gsl_pow_2(std[1])/n[1])));
+ statisticValue.append((mean[0] - mean[1]) / (qSqrt( (gsl_pow_2(std[0])/n[0]) +
+ (gsl_pow_2(std[1])/n[1]))));
printLine(9, "Assumption: UnEqual Variance b/w both population means");
}
@@ -320,8 +321,8 @@
case HypothesisTest::Test::Type::ZTest: {
testName = "Z";
sp = qSqrt( ((n[0]-1) * gsl_pow_2(std[0]) + (n[1]-1) * gsl_pow_2(std[1])) / df);
- statisticValue = (mean[0] - mean[1]) / (sp * qSqrt( 1.0 / n[0] + 1.0 / n[1]));
- pValue = gsl_cdf_gaussian_P(statisticValue, sp);
+ statisticValue.append((mean[0] - mean[1]) / (sp * qSqrt( 1.0 / n[0] + 1.0 / n[1])));
+// pValue.append(gsl_cdf_gaussian_P(statisticValue, sp));
break;
}
case HypothesisTest::Test::Type::Anova:
@@ -330,19 +331,19 @@
}
currTestName = "" + i18n("Two Sample Independent %1 Test for %2 vs %3", testName, col1Name, col2Name) + "
";
- pValue = getPValue(test, statisticValue, col1Name, col2Name, (mean[0] - mean[1]), sp, df);
+ pValue.append(getPValue(test, statisticValue[0], col1Name, col2Name, (mean[0] - mean[1]), sp, df));
printLine(2, i18n("Significance level is %1", round(significanceLevel)), "blue");
- printLine(4, i18n("%1 Value is %2 ", testName, round(statisticValue)), "green");
+ printLine(4, i18n("%1 Value is %2 ", testName, round(statisticValue[0])), "green");
printTooltip(4, i18n("More is the |%1-value|, more safely we can reject the null hypothesis", testName));
- printLine(5, i18n("P Value is %1 ", pValue), "green");
+ printLine(5, i18n("P Value is %1 ", pValue[0]), "green");
printLine(6, i18n("Degree of Freedom is %1", df), "green");
printTooltip(6, i18n("Number of independent Pieces of information that went into calculating the estimate"));
- if (pValue <= significanceLevel)
+ if (pValue[0] <= significanceLevel)
printTooltip(5, i18n("We can safely reject Null Hypothesis for significance level %1", round(significanceLevel)));
else
printTooltip(5, i18n("There is a plausibility for Null Hypothesis to be true"));
@@ -352,8 +353,6 @@
/********************************Two Sample Paired ***************************************/
void HypothesisTestPrivate::performTwoSamplePairedTest(HypothesisTest::Test::Type test) {
- clearTestView();
-
if (columns.size() != 2) {
printError("Inappropriate number of columns selected");
@@ -403,7 +402,7 @@
switch (test) {
case HypothesisTest::Test::Type::TTest: {
- statisticValue = mean / (std / qSqrt(n));
+ statisticValue[0] = mean / (std / qSqrt(n));
df = n - 1;
testName = "T";
printLine(6, i18n("Degree of Freedom is %1
name(), i18n("%1", populationMean), mean, std, df);
+ pValue.append(getPValue(test, statisticValue[0], columns[0]->name(), i18n("%1", populationMean), mean, std, df));
currTestName = "" + i18n("One Sample %1 Test for %2 vs %3", testName, columns[0]->name(), columns[1]->name()) + "
";
printLine(2, i18n("Significance level is %1 ", round(significanceLevel)), "blue");
- printLine(4, i18n("%1 Value is %2 ", testName, round(statisticValue)), "green");
- printLine(5, i18n("P Value is %1 ", pValue), "green");
+ printLine(4, i18n("%1 Value is %2 ", testName, round(statisticValue[0])), "green");
+ printLine(5, i18n("P Value is %1 ", pValue[0]), "green");
- if (pValue <= significanceLevel)
+ if (pValue[0] <= significanceLevel)
printTooltip(5, i18n("We can safely reject Null Hypothesis for significance level %1", significanceLevel));
else
printTooltip(5, i18n("There is a plausibility for Null Hypothesis to be true"));
@@ -440,8 +439,6 @@
/******************************** One Sample ***************************************/
void HypothesisTestPrivate::performOneSampleTest(HypothesisTest::Test::Type test) {
- clearTestView();
-
if (columns.size() != 1) {
printError("Inappropriate number of columns selected");
@@ -488,7 +485,7 @@
switch (test) {
case HypothesisTest::Test::Type::TTest: {
testName = "T";
- statisticValue = (mean - populationMean) / (std / qSqrt(n));
+ statisticValue.append((mean - populationMean) / (std / qSqrt(n)));
df = n - 1;
printLine(6, i18n("Degree of Freedom is %1", df), "blue");
break;
@@ -496,7 +493,7 @@
case HypothesisTest::Test::Type::ZTest: {
testName = "Z";
df = 0;
- statisticValue = (mean - populationMean) / (std / qSqrt(n));
+ statisticValue.append((mean - populationMean) / (std / qSqrt(n)));
break;
}
case HypothesisTest::Test::Type::Anova:
@@ -504,14 +501,14 @@
break;
}
- pValue = getPValue(test, statisticValue, columns[0]->name(), i18n("%1",populationMean), mean - populationMean, std, df);
+ pValue.append(getPValue(test, statisticValue[0], columns[0]->name(), i18n("%1",populationMean), mean - populationMean, std, df));
currTestName = "" + i18n("One Sample %1 Test for %2", testName, columns[0]->name()) + "
";
printLine(2, i18n("Significance level is %1", round(significanceLevel)), "blue");
- printLine(4, i18n("%1 Value is %2", testName, round(statisticValue)), "green");
- printLine(5, i18n("P Value is %1", pValue), "green");
+ printLine(4, i18n("%1 Value is %2", testName, round(statisticValue[0])), "green");
+ printLine(5, i18n("P Value is %1", pValue[0]), "green");
- if (pValue <= significanceLevel)
+ if (pValue[0] <= significanceLevel)
printTooltip(5, i18n("We can safely reject Null Hypothesis for significance level %1", significanceLevel));
else
printTooltip(5, i18n("There is a plausibility for Null Hypothesis to be true"));
@@ -526,8 +523,7 @@
// w stands for within groups
// np is number of partition i.e., number of classes
void HypothesisTestPrivate::performOneWayAnova() {
- clearTestView();
- int np, totalRows;
+ int np, totalRows;
countPartitions(columns[0], np, totalRows);
int* ni = new int[np];
@@ -551,8 +547,6 @@
double sW = 0; // sum of squares of (value - mean of group) within the groups
int fW = 0; // degree of freedom within the group
double msW = 0; // mean sum of squares within the groups
- double fValue = 0;
-
// now finding mean of each group;
@@ -573,10 +567,10 @@
msB = sB / fB;
msW = sW / fW;
- fValue = msB / msW;
+ statisticValue.append(msB / msW);
- pValue = nsl_stats_fdist_p(fValue, static_cast(np-1), fW);
+ pValue.append(nsl_stats_fdist_p(statisticValue[0], static_cast(np-1), fW));
QMapIterator i(classnameToIndex);
while (i.hasNext()) {
@@ -643,10 +637,10 @@
delete[] std;
delete[] colNames;
- printLine(1, i18n("F Value is %1", round(fValue)), "green");
- printLine(2, i18n("P Value is %1 ", pValue), "green");
+ printLine(1, i18n("F Value is %1", round(statisticValue[0])), "green");
+ printLine(2, i18n("P Value is %1 ", pValue[0]), "green");
- if (pValue <= significanceLevel)
+ if (pValue[0] <= significanceLevel)
printTooltip(2, i18n("We can safely reject Null Hypothesis for significance level %1", significanceLevel));
else
printTooltip(2, i18n("There is a plausibility for Null Hypothesis to be true"));
@@ -659,9 +653,9 @@
// all formulas and symbols are taken from: http://statweb.stanford.edu/~susan/courses/s141/exanova.pdf
//TODO: suppress warning of variable length array are a C99 feature.
-//TODO: changed int mean to double mean;
+//TODO: add assumptions verification option
+//TODO: add tail option (if needed)
void HypothesisTestPrivate::performTwoWayAnova() {
- clearTestView();
int np_a, totalRows_a;
int np_b, totalRows_b;
countPartitions(columns[0], np_a, totalRows_a);
@@ -720,9 +714,9 @@
groupMean[i][j] /= replicates[i][j];
}
- for (int i = 0; i < np_a; i++)
- for (int j = 0; j < np_b; j++)
- groupMean[i][j] = int(groupMean[i][j]);
+// for (int i = 0; i < np_a; i++)
+// for (int j = 0; j < np_b; j++)
+// groupMean[i][j] = int(groupMean[i][j]);
double ss_within = 0;
for (int i = 0; i < totalRows_a; i++) {
@@ -749,15 +743,32 @@
for (int i = 0; i < np_a; i++)
mean += mean_a[i] / np_a;
- QDEBUG("ss_within is " << ss_within);
- QDEBUG("df_within is " << df_within);
- QDEBUG("ms_within is " << ms_within);
+ double ss_a = 0;
for (int i = 0; i < np_a; i++)
- QDEBUG("mean_a is " << mean_a[i]);
+ ss_a += gsl_pow_2(mean_a[i] - mean);
+ ss_a *= replicate * np_b;
+
+ int df_a = np_a - 1;
+ double ms_a = ss_a / df_a;
+
+ double ss_b = 0;
for (int i = 0; i < np_b; i++)
- QDEBUG("mean_b is " << mean_b[i]);
+ ss_b += gsl_pow_2(mean_b[i] - mean);
+ ss_b *= replicate * np_a;
+
+ int df_b = np_b - 1;
+ double ms_b = ss_b / df_b;
+
+ double ss_interaction = 0;
+
+ for (int i = 0; i < np_a; i++)
+ for (int j = 0; j < np_b; j++)
+ ss_interaction += gsl_pow_2(groupMean[i][j] - mean_a[i] - mean_b[j] + mean);
+ ss_interaction *= replicate;
+ int df_interaction = (np_a - 1) * (np_b - 1);
+ double ms_interaction = ss_interaction / df_interaction;
QString partitionNames_a[np_a];
QString partitionNames_b[np_b];
@@ -791,24 +802,79 @@
for (int i = 0; i < np_a; i++) {
rowMajor.append(new Cell(partitionNames_a[i], level, true));
for (int j = 0; j < np_b; j++) {
- rowMajor.append(new Cell(groupMean[i][j], level));
+ rowMajor.append(new Cell(round(groupMean[i][j]), level));
rowMajor.append(new Cell(replicates[i][j], level));
}
- rowMajor.append(new Cell(mean_a[i], level));
+ rowMajor.append(new Cell(round(mean_a[i]), level));
level++;
}
rowMajor.append(new Cell("Mean", level, true));
for (int i = 0; i < np_b; i++)
- rowMajor.append(new Cell(mean_b[i], level, false, 1, 2));
- rowMajor.append(new Cell(mean, level));
+ rowMajor.append(new Cell(round(mean_b[i]), level, false, 1, 2));
+ rowMajor.append(new Cell(round(mean), level));
statsTable = "" + i18n("Contingency Table") + "
";
statsTable += getHtmlTable3(rowMajor);
-// QDEBUG("");
-// QDEBUG("");
-// QDEBUG(statsTable);
+ statsTable += "";
+ statsTable += "" + i18n("results table") + "
";
+
+ rowMajor.clear();
+ level = 0;
+ rowMajor.append(new Cell("", level, true));
+ rowMajor.append(new Cell("SS", level, true));
+ rowMajor.append(new Cell("DF", level, true));
+ rowMajor.append(new Cell("MS", level, true));
+
+ level++;
+ rowMajor.append(new Cell(columns[0]->name(), level, true));
+ rowMajor.append(new Cell(round(ss_a), level));
+ rowMajor.append(new Cell(df_a, level));
+ rowMajor.append(new Cell(round(ms_a), level));
+
+ level++;
+ rowMajor.append(new Cell(columns[1]->name(), level, true));
+ rowMajor.append(new Cell(round(ss_b), level));
+ rowMajor.append(new Cell(df_b, level));
+ rowMajor.append(new Cell(round(ms_b), level));
+
+ level++;
+ rowMajor.append(new Cell("Interaction", level, true));
+ rowMajor.append(new Cell(round(ss_interaction), level));
+ rowMajor.append(new Cell(df_interaction, level));
+ rowMajor.append(new Cell(round(ms_interaction), level));
+
+ level++;
+ rowMajor.append(new Cell("Within", level, true));
+ rowMajor.append(new Cell(round(ss_within), level));
+ rowMajor.append(new Cell(df_within, level));
+ rowMajor.append(new Cell(round(ms_within), level));
+
+ statsTable += getHtmlTable3(rowMajor);
+
+ double fValue_a = ms_a / ms_within;
+ double fValue_b = ms_b / ms_within;
+ double fValue_interaction = ms_interaction / ms_within;
+
+ double pValue_a = nsl_stats_fdist_p(fValue_a, static_cast(np_a - 1), df_a);
+ double pValue_b = nsl_stats_fdist_p(fValue_b, static_cast(np_b - 1), df_b);
+
+ printLine(0, "F(df" + columns[0]->name() + ", dfwithin) is " + round(fValue_a), "blue");
+ printLine(1, "F(df" + columns[1]->name() + ", dfwithin) is " + round(fValue_b), "blue");
+ printLine(2, "F(dfinteraction, dfwithin) is " + round(fValue_interaction), "blue");
+
+ printLine(4, "P(df" + columns[0]->name() + ", dfwithin) is " + round(pValue_a), "blue");
+ printLine(5, "P(df" + columns[1]->name() + ", dfwithin) is " + round(pValue_b), "blue");
+// printLine(2, "P(dfinteraction, dfwithin) is " + round(fValue_interaction), "blue");
+
+ statisticValue.append(fValue_a);
+ statisticValue.append(fValue_b);
+ statisticValue.append(fValue_interaction);
+
+ pValue.append(pValue_a);
+ pValue.append(pValue_b);
+
return;
}
@@ -825,8 +891,6 @@
// ziBarBar = mean for all zij
// ni = number of elements in group i
void HypothesisTestPrivate::performLeveneTest(bool categoricalVariable) {
- clearTestView();
-
if (columns.size() != 2) {
printError("Inappropriate number of columns selected");
return;
@@ -1045,16 +1109,16 @@
delete[] ziBar;
delete[] ni;
- pValue = nsl_stats_fdist_p(fValue, static_cast(np-1), df);
+ pValue.append(nsl_stats_fdist_p(fValue, static_cast(np-1), df));
printLine(0, "Null Hypothesis: Variance is equal between all classes", "blue");
printLine(1, "Alternate Hypothesis: Variance is not equal in at-least one pair of classes", "blue");
printLine(2, i18n("Significance level is %1", round(significanceLevel)), "blue");
printLine(4, i18n("F Value is %1 ", round(fValue)), "green");
- printLine(5, i18n("P Value is %1 ", pValue), "green");
+ printLine(5, i18n("P Value is %1 ", pValue[0]), "green");
printLine(6, i18n("Degree of Freedom is %1", df), "green");
- if (pValue <= significanceLevel) {
+ if (pValue[0] <= significanceLevel) {
printTooltip(5, i18n("We can safely reject Null Hypothesis for significance level %1", significanceLevel));
printLine(8, "Requirement for homogeneity is not met", "red");
} else {
@@ -1062,6 +1126,7 @@
printLine(8, "Requirement for homogeneity is met", "green");
}
+ statisticValue.append(fValue);
return;
}
@@ -1247,28 +1312,28 @@
//TODO change ("⋖") symbol to ("<"), currently macro UTF8_QSTRING is not working properly if used "<" symbol;
// TODO: check for correctness between: for TestZ with TailTwo
-// pValue = 2*gsl_cdf_tdist_P(value, df) v/s
-// pValue = gsl_cdf_tdis_P(value, df) + gsl_cdf_tdis_P(-value, df);
+// pValue.append(2*gsl_cdf_tdist_P(value, df) v/s
+// pValue.append(gsl_cdf_tdis_P(value, df) + gsl_cdf_tdis_P(-value, df);
double HypothesisTestPrivate::getPValue(const HypothesisTest::Test::Type& test, double& value, const QString& col1Name, const QString& col2Name, const double mean, const double sp, const int df) {
switch (test) {
case HypothesisTest::Test::Type::TTest: {
switch (tailType) {
case HypothesisTest::Test::Tail::Negative: {
- pValue = gsl_cdf_tdist_P(value, df);
+ pValue.append(gsl_cdf_tdist_P(value, df));
printLine(0, i18n("Null Hypothesis: Population mean of %1 %2 Population mean of %3", col1Name, UTF8_QSTRING("≥"), col2Name), "blue");
printLine(1, i18n("Alternate Hypothesis: Population mean of %1 %2 Population mean of %3", col1Name, UTF8_QSTRING("⋖"), col2Name), "blue");
break;
}
case HypothesisTest::Test::Tail::Positive: {
value *= -1;
- pValue = gsl_cdf_tdist_P(value, df);
+ pValue.append(gsl_cdf_tdist_P(value, df));
printLine(0, i18n("Null Hypothesis: Population mean of %1 %2 Population mean of %3", col1Name, UTF8_QSTRING("≤"), col2Name), "blue");
printLine(1, i18n("Alternate Hypothesis: Population mean of %1 %2 Population mean of %3", col1Name, UTF8_QSTRING(">"), col2Name), "blue");
break;
}
case HypothesisTest::Test::Tail::Two: {
- pValue = 2.*gsl_cdf_tdist_P(-fabs(value), df);
+ pValue.append(2.*gsl_cdf_tdist_P(-fabs(value), df));
printLine(0, i18n("Null Hypothesis: Population mean of %1 %2 Population mean of %3", col1Name, UTF8_QSTRING("="), col2Name), "blue");
printLine(1, i18n("Alternate Hypothesis: Population mean of %1 %2 Population mean of %3", col1Name, UTF8_QSTRING("≠"), col2Name), "blue");
@@ -1280,20 +1345,20 @@
case HypothesisTest::Test::Type::ZTest: {
switch (tailType) {
case HypothesisTest::Test::Tail::Negative: {
- pValue = gsl_cdf_gaussian_P(value - mean, sp);
+ pValue.append(gsl_cdf_gaussian_P(value - mean, sp));
printLine(0, i18n("Null Hypothesis: Population mean of %1 %2 Population mean of %3 ", col1Name, UTF8_QSTRING("≥"), col2Name), "blue");
printLine(1, i18n("Alternate Hypothesis: Population mean of %1 %2 Population mean of %3 ", col1Name, UTF8_QSTRING("⋖"), col2Name), "blue");
break;
}
case HypothesisTest::Test::Tail::Positive: {
value *= -1;
- pValue = nsl_stats_tdist_p(value - mean, sp);
+ pValue.append(nsl_stats_tdist_p(value - mean, sp));
printLine(0, i18n("Null Hypothesis: Population mean of %1 %2 Population mean of %3 ", col1Name, UTF8_QSTRING("≤"), col2Name), "blue");
printLine(1, i18n("Alternate Hypothesis: Population mean of %1 %2 Population mean of %3 ", col1Name, UTF8_QSTRING(">"), col2Name), "blue");
break;
}
case HypothesisTest::Test::Tail::Two: {
- pValue = 2.*gsl_cdf_gaussian_P(value - mean, sp);
+ pValue.append(2.*gsl_cdf_gaussian_P(value - mean, sp));
printLine(0, i18n("Null Hypothesis: Population mean of %1 %2 Population mean of %3 ", col1Name, UTF8_QSTRING("="), col2Name), "blue");
printLine(1, i18n("Alternate Hypothesis: Population mean of %1 %2 Population mean of %3 ", col1Name, UTF8_QSTRING("≠"), col2Name), "blue");
break;
@@ -1306,9 +1371,9 @@
break;
}
- if (pValue > 1)
+ if (pValue[0] > 1)
return 1;
- return pValue;
+ return pValue[0];
}
int HypothesisTestPrivate::setSpanValues(HypothesisTestPrivate::Node* root, int& totalLevels) {
@@ -1473,7 +1538,9 @@
".tg {border-collapse:collapse;border: 1px solid black;}"
".tg td{font-family:Arial, sans-serif;font-size:14px;padding:10px 5px;border: 1px solid black;overflow:hidden;word-break:normal;color:#333;background-color:#fff;}"
".tg th{font-family:Arial, sans-serif;font-size:14px;font-weight:normal;padding:10px 5px;border: 1px solid black;overflow:hidden;word-break:normal;color:#333;background-color:#f0f0f0;}"
- "";
+ "";
+
+ table += "";
table += " ";
int prevLevel = 0;
@@ -1501,6 +1568,7 @@
cellEndTag;
}
table += "
";
+ table += "
";
return table;
}
@@ -1527,16 +1595,6 @@
printLine(0, errorMsg, "red");
}
-void HypothesisTestPrivate::clearSummaryLayout() {
- for (int i = 0; i < 10; i++)
- resultLine[i]->clear();
-}
-
-void HypothesisTestPrivate::clearTestView() {
- statsTable = "";
- clearSummaryLayout();
-}
-
/**********************************************************************************
* virtual functions implementations
diff --git a/src/backend/hypothesisTest/HypothesisTestPrivate.h b/src/backend/hypothesisTest/HypothesisTestPrivate.h
--- a/src/backend/hypothesisTest/HypothesisTestPrivate.h
+++ b/src/backend/hypothesisTest/HypothesisTestPrivate.h
@@ -90,8 +90,8 @@
double significanceLevel;
QString statsTable;
HypothesisTest::Test::Tail tailType;
- double pValue{0};
- double statisticValue{0};
+ QList pValue;
+ QList statisticValue;
QVBoxLayout* summaryLayout{nullptr};
QLabel* resultLine[10];
@@ -118,9 +118,6 @@
void printLine(const int& index, const QString& msg, const QString& color = "black");
void printTooltip(const int& index, const QString& msg);
void printError(const QString& errorMsg);
- void clearTestView();
-
- void clearSummaryLayout();
bool m_dbCreated{false};
};
diff --git a/tests/stats/CMakeLists.txt b/tests/stats/CMakeLists.txt
--- a/tests/stats/CMakeLists.txt
+++ b/tests/stats/CMakeLists.txt
@@ -1 +1,2 @@
-add_subdirectory(ttest)
\ No newline at end of file
+add_subdirectory(ttest)
+add_subdirectory(anova)
diff --git a/tests/stats/anova/AnovaTest.h b/tests/stats/anova/AnovaTest.h
new file mode 100644
--- /dev/null
+++ b/tests/stats/anova/AnovaTest.h
@@ -0,0 +1,42 @@
+/***************************************************************************
+ File : AnovaTest.h
+ Project : LabPlot
+ Description : Tests for data correlation
+ --------------------------------------------------------------------
+ Copyright : (C) 2019 Devanshu Agarwal (agarwaldevanshu8@gmail.com)
+ ***************************************************************************/
+
+/***************************************************************************
+ * *
+ * This program is free software; you can redistribute it and/or modify *
+ * it under the terms of the GNU General Public License as published by *
+ * the Free Software Foundation; either version 2 of the License, or *
+ * (at your option) any later version. *
+ * *
+ * This program is distributed in the hope that it will be useful, *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
+ * GNU General Public License for more details. *
+ * *
+ * You should have received a copy of the GNU General Public License *
+ * along with this program; if not, write to the Free Software *
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, *
+ * Boston, MA 02110-1301 USA *
+ * *
+ ***************************************************************************/
+#ifndef ANOVATEST_H
+#define ANOVATEST_H
+
+#include <../../CommonTest.h>
+
+class AnovaTest : public CommonTest {
+ Q_OBJECT
+
+private slots:
+ void oneWayAnova_data();
+ void oneWayAnova();
+
+ void twoWayAnova_data();
+ void twoWayAnova();
+};
+#endif
diff --git a/tests/stats/anova/AnovaTest.cpp b/tests/stats/anova/AnovaTest.cpp
new file mode 100644
--- /dev/null
+++ b/tests/stats/anova/AnovaTest.cpp
@@ -0,0 +1,183 @@
+/***************************************************************************
+ File : AnovaTest.cpp
+ Project : LabPlot
+ Description : Tests for data correlation
+ --------------------------------------------------------------------
+ Copyright : (C) 2019 Devanshu Agarwal (agarwaldevanshu8@gmail.com)
+ ***************************************************************************/
+
+/***************************************************************************
+ * *
+ * This program is free software; you can redistribute it and/or modify *
+ * it under the terms of the GNU General Public License as published by *
+ * the Free Software Foundation; either version 2 of the License, or *
+ * (at your option) any later version. *
+ * *
+ * This program is distributed in the hope that it will be useful, *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
+ * GNU General Public License for more details. *
+ * *
+ * You should have received a copy of the GNU General Public License *
+ * along with this program; if not, write to the Free Software *
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, *
+ * Boston, MA 02110-1301 USA *
+ * *
+ ***************************************************************************/
+
+#include "AnovaTest.h"
+#include "backend/hypothesisTest/HypothesisTest.h"
+
+#include "backend/core/AbstractColumn.h"
+#include "backend/core/column/Column.h"
+
+void AnovaTest::oneWayAnova_data() {
+ QTest::addColumn>("col1Data");
+ QTest::addColumn>("col2Data");
+ QTest::addColumn("fValue_expected");
+ QTest::addColumn("pValue_expected");
+
+ // First Sample
+ QVector col1Data = {"1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1",
+ "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2",
+ "3", "3", "3", "3", "3", "3", "3", "3", "3", "3", "3", "3", "3", "3", "3", "3", "3", "3", "3", "3",
+ "4", "4", "4", "4", "4", "4", "4", "4", "4", "4", "4", "4", "4", "4", "4", "4", "4", "4", "4", "4",
+ "5", "5", "5", "5", "5", "5", "5", "5", "5", "5", "5", "5", "5", "5", "5", "5", "5", "5", "5", "5"};
+
+ QVector col2Data = {1, 43, 15, 40, 8, 17, 30, 34, 34, 26, 1, 7, 22, 30, 40, 15, 20, 9, 14, 15,
+ 6, 30, 15, 30, 12, 17, 21, 23, 20, 27, -19, -18, -8, -6, -6, -9, -17, -12, -11, -6,
+ 5, 8, 12, 19, 8, 15, 21, 28, 26, 27, -10, 6, 4, 3, 0, 4, 9, -5, 7, 13,
+ 38, 20, 20, 28, 11, 17, 15, 27, 24, 23, 28, 26, 34, 32, 24, 29, 30, 24, 34, 23,
+ -5, -12, -15, -4, -2, -6, -2, -7, -10, -15, -13, -16, -23, -22, -9, -18, -17, -15, -14, -15};
+ double fValue_expected = 33.1289;
+ double pValue_expected = 0;
+
+ QTest::newRow("socscistatistics") << col1Data << col2Data << fValue_expected << pValue_expected;
+}
+
+void AnovaTest::oneWayAnova() {
+ QFETCH(QVector, col1Data);
+ QFETCH(QVector, col2Data);
+ QFETCH(double, fValue_expected);
+ QFETCH(double, pValue_expected);
+
+ Column* col1 = new Column("col1", AbstractColumn::Text);
+ Column* col2 = new Column("col2", AbstractColumn::Numeric);
+
+ col1->replaceTexts(0, col1Data);
+ col2->replaceValues(0, col2Data);
+
+ QVector cols;
+ cols << col1 << col2;
+
+ HypothesisTest anovaTest("One Way Anova");
+ anovaTest.setColumns(cols);
+
+ HypothesisTest::Test test;
+ test.type = HypothesisTest::Test::Type::Anova;
+ test.subtype = HypothesisTest::Test::SubType::OneWay;
+ test.tail = HypothesisTest::Test::Tail::Two;
+
+ bool categoricalVariable = true;
+ bool equalVariance = true;
+
+ anovaTest.performTest(test, categoricalVariable, equalVariance);
+
+ double fValue = anovaTest.statisticValue()[0];
+ double pValue = anovaTest.pValue()[0];
+
+ QDEBUG("fValue is " << fValue);
+ QDEBUG("pValue is: " << pValue);
+ QDEBUG("fValue_expected is " << fValue_expected);
+ QDEBUG("pValue_expected is: " << pValue_expected);
+
+ FuzzyCompare(fValue, fValue_expected, 0.1);
+ FuzzyCompare(pValue, pValue_expected, 0.1);
+}
+
+void AnovaTest::twoWayAnova_data() {
+ QTest::addColumn>("col1Data");
+ QTest::addColumn>("col2Data");
+ QTest::addColumn>("col3Data");
+ QTest::addColumn("fCol1Value_expected");
+ QTest::addColumn("fCol2Value_expected");
+ QTest::addColumn("fInteractionValue_expected");
+ QTest::addColumn("pCol1Value_expected");
+ QTest::addColumn("pCol2Value_expected");
+
+ // First Sample
+ // This data set is taken from: http://statweb.stanford.edu/~susan/courses/s141/exanova.pdf
+ QVector col1Data = {"Super", "Super", "Super", "Super", "Super", "Super", "Super", "Super", "Super", "Super", "Super", "Super", "Best", "Best", "Best", "Best", "Best", "Best", "Best", "Best", "Best", "Best", "Best", "Best"};
+ QVector col2Data = {"cold", "cold", "cold", "cold", "warm", "warm", "warm", "warm", "hot", "hot", "hot", "hot", "cold", "cold", "cold", "cold", "warm", "warm", "warm", "warm", "hot", "hot", "hot", "hot"};
+ QVector col3Data = {4, 5, 6, 5, 7, 9, 8, 12, 10, 12, 11, 9, 6, 6, 4, 4, 13, 15, 12, 12, 12, 13, 10, 13};
+ double fCol1Value_expected = 9.8108;
+ double fCol2Value_expected = 48.7297;
+ double fInteractionValue_expected = 3.9730;
+ double pCol1Value_expected = 0.005758;
+ double pCol2Value_expected = 5.44e-08;
+// double pInteractionValue_expected = 0.037224;
+
+ QTest::newRow("detergent vs temperature") << col1Data << col2Data << col3Data <<
+ fCol1Value_expected << fCol2Value_expected << fInteractionValue_expected <<
+ pCol1Value_expected << pCol2Value_expected;
+}
+
+//TODO: check for pValue. In document probabilty is Pr(>F)
+void AnovaTest::twoWayAnova() {
+ QFETCH(QVector, col1Data);
+ QFETCH(QVector, col2Data);
+ QFETCH(QVector, col3Data);
+ QFETCH(double, fCol1Value_expected);
+ QFETCH(double, fCol2Value_expected);
+ QFETCH(double, fInteractionValue_expected);
+ QFETCH(double, pCol1Value_expected);
+ QFETCH(double, pCol2Value_expected);
+
+ Column* col1 = new Column("col1", AbstractColumn::Text);
+ Column* col2 = new Column("col2", AbstractColumn::Text);
+ Column* col3 = new Column("col3", AbstractColumn::Numeric);
+
+ col1->replaceTexts(0, col1Data);
+ col2->replaceTexts(0, col2Data);
+ col3->replaceValues(0, col3Data);
+
+ QVector cols;
+ cols << col1 << col2 << col3;
+
+ HypothesisTest anovaTest("Two Way Anova");
+ anovaTest.setColumns(cols);
+
+ HypothesisTest::Test test;
+ test.type = HypothesisTest::Test::Type::Anova;
+ test.subtype = HypothesisTest::Test::SubType::TwoWay;
+ test.tail = HypothesisTest::Test::Tail::Two;
+
+ anovaTest.performTest(test);
+ double fCol1Value = anovaTest.statisticValue()[0];
+ double fCol2Value = anovaTest.statisticValue()[1];
+ double fInteractionValue = anovaTest.statisticValue()[2];
+
+ double pCol1Value = anovaTest.pValue()[0];
+ double pCol2Value = anovaTest.pValue()[1];
+
+ QDEBUG("size of statistic value is " << anovaTest.statisticValue().size());
+ QDEBUG("fCol1Value is " << fCol1Value);
+ QDEBUG("fCol1Value_expected is " << fCol1Value_expected);
+ QDEBUG("fCol2Value is " << fCol2Value);
+ QDEBUG("fCol2Value_expected is " << fCol2Value_expected);
+ QDEBUG("fInteractionValue is " << fInteractionValue);
+ QDEBUG("fInteractionValue_expected is " << fInteractionValue_expected);
+
+ QDEBUG("pCol1Value is " << pCol1Value);
+ QDEBUG("pCol1Value_expected is " << pCol1Value_expected);
+ QDEBUG("pCol2Value is " << pCol2Value);
+ QDEBUG("pCol2Value_expected is " << pCol2Value_expected);
+
+ FuzzyCompare(fCol1Value, fCol1Value_expected, 0.1);
+ FuzzyCompare(fCol2Value, fCol2Value_expected, 0.1);
+ FuzzyCompare(fInteractionValue, fInteractionValue_expected, 0.1);
+ FuzzyCompare(pCol1Value, pCol1Value_expected, 0.1);
+ FuzzyCompare(pCol2Value, pCol2Value_expected, 0.1);
+}
+
+QTEST_MAIN(AnovaTest)
diff --git a/tests/stats/anova/CMakeLists.txt b/tests/stats/anova/CMakeLists.txt
new file mode 100644
--- /dev/null
+++ b/tests/stats/anova/CMakeLists.txt
@@ -0,0 +1,7 @@
+add_executable (anova AnovaTest.cpp ../../CommonTest.cpp)
+
+target_link_libraries(anova Qt5::Test)
+target_link_libraries(anova KF5::Archive KF5::XmlGui)
+target_link_libraries(anova labplot2lib)
+
+add_test(NAME anova COMMAND anova)
diff --git a/tests/stats/ttest/TTestTest.cpp b/tests/stats/ttest/TTestTest.cpp
--- a/tests/stats/ttest/TTestTest.cpp
+++ b/tests/stats/ttest/TTestTest.cpp
@@ -82,8 +82,8 @@
bool equalVariance = true;
tTest.performTest(test, categoricalVariable, equalVariance);
- double tValue = tTest.statisticValue();
- double pValue = tTest.pValue();
+ double tValue = tTest.statisticValue()[0];
+ double pValue = tTest.pValue()[0];
qDebug() << "tValue is " << tValue;
qDebug() << "pValue is: " << pValue;
@@ -135,8 +135,8 @@
test.tail = HypothesisTest::Test::Tail::Two;
tTest.performTest(test);
- double tValue = tTest.statisticValue();
- double pValue = tTest.pValue();
+ double tValue = tTest.statisticValue()[0];
+ double pValue = tTest.pValue()[0];
qDebug() << "tValue is " << tValue;
qDebug() << "pValue is: " << pValue;
@@ -185,8 +185,8 @@
test.tail = HypothesisTest::Test::Tail::Two;
tTest.performTest(test);
- double tValue = tTest.statisticValue();
- double pValue = tTest.pValue();
+ double tValue = tTest.statisticValue()[0];
+ double pValue = tTest.pValue()[0];
qDebug() << "tValue is " << tValue;
qDebug() << "pValue is: " << pValue;