diff --git a/src/backend/hypothesisTest/HypothesisTest.cpp b/src/backend/hypothesisTest/HypothesisTest.cpp index 35eb3c38a..811aabd0e 100644 --- a/src/backend/hypothesisTest/HypothesisTest.cpp +++ b/src/backend/hypothesisTest/HypothesisTest.cpp @@ -1,1604 +1,1662 @@ /*************************************************************************** File : HypothesisTest.cpp Project : LabPlot Description : Doing Hypothesis-Test on data provided -------------------------------------------------------------------- Copyright : (C) 2019 Devanshu Agarwal(agarwaldevanshu8@gmail.com) ***************************************************************************/ /*************************************************************************** * * * This program is free software; you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * * the Free Software Foundation; either version 2 of the License, or * * (at your option) any later version. * * * * This program is distributed in the hope that it will be useful, * * but WITHOUT ANY WARRANTY; without even the implied warranty of * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * * GNU General Public License for more details. * * * * You should have received a copy of the GNU General Public License * * along with this program; if not, write to the Free Software * * Foundation, Inc., 51 Franklin Street, Fifth Floor, * * Boston, MA 02110-1301 USA * * * ***************************************************************************/ #include "HypothesisTest.h" #include "HypothesisTestPrivate.h" #include "kdefrontend/hypothesisTest/HypothesisTestView.h" #include "backend/spreadsheet/Spreadsheet.h" #include "backend/core/column/Column.h" #include "backend/lib/macros.h" #include #include #include #include #include #include #include #include #include #include #include -#include - extern "C" { #include "backend/nsl/nsl_stats.h" } HypothesisTest::HypothesisTest(const QString &name) : AbstractPart(name), d(new HypothesisTestPrivate(this)) { } HypothesisTest::~HypothesisTest() { delete d; } void HypothesisTest::setDataSourceType(DataSourceType type) { if (type != d->dataSourceType) d->dataSourceType = type; } HypothesisTest::DataSourceType HypothesisTest::dataSourceType() const { return d->dataSourceType; } void HypothesisTest::setDataSourceSpreadsheet(Spreadsheet* spreadsheet) { if (spreadsheet != d->dataSourceSpreadsheet) d->setDataSourceSpreadsheet(spreadsheet); } void HypothesisTest::setColumns(const QVector& cols) { d->columns = cols; } void HypothesisTest::setColumns(QStringList cols) { return d->setColumns(cols); } QStringList HypothesisTest::allColumns() { return d->allColumns; } void HypothesisTest::setPopulationMean(QVariant populationMean) { d->populationMean = populationMean.toDouble(); } void HypothesisTest::setSignificanceLevel(QVariant alpha) { d->significanceLevel = alpha.toDouble(); } QString HypothesisTest::testName() { return d->currTestName; } QString HypothesisTest::statsTable() { return d->statsTable; } void HypothesisTest::performTest(Test test, bool categoricalVariable, bool equalVariance) { d->tailType = test.tail; + d->pValue.clear(); + d->statisticValue.clear(); + d->statsTable = ""; + for (int i = 0; i < 10; i++) + d->resultLine[i]->clear(); + switch (test.subtype) { case HypothesisTest::Test::SubType::TwoSampleIndependent: { d->currTestName = "

" + i18n("Two Sample Independent Test") + "

"; d->performTwoSampleIndependentTest(test.type, categoricalVariable, equalVariance); break; } case HypothesisTest::Test::SubType::TwoSamplePaired: d->currTestName = "

" + i18n("Two Sample Paired Test") + "

"; d->performTwoSamplePairedTest(test.type); break; case HypothesisTest::Test::SubType::OneSample: { d->currTestName = "

" + i18n("One Sample Test") + "

"; d->performOneSampleTest(test.type); break; } case HypothesisTest::Test::SubType::OneWay: { d->currTestName = "

" + i18n("One Way Anova") + "

"; d->performOneWayAnova(); break; } case HypothesisTest::Test::SubType::TwoWay: { d->currTestName = "

" + i18n("Two Way Anova") + "

"; d->performTwoWayAnova(); break; } case HypothesisTest::Test::SubType::NoneSubType: break; } emit changed(); } void HypothesisTest::performLeveneTest(bool categoricalVariable) { d->currTestName = "

" + i18n("Levene Test for Equality of Variance") + "

"; d->performLeveneTest(categoricalVariable); - emit changed(); } -double HypothesisTest::statisticValue() { +QList HypothesisTest::statisticValue() { return d->statisticValue; } -double HypothesisTest::pValue() { +QList HypothesisTest::pValue() { return d->pValue; } QVBoxLayout* HypothesisTest::summaryLayout() { return d->summaryLayout; } /****************************************************************************** * Private Implementations * ****************************************************************************/ //TODO: backend of z test; //TODO: add tooltip to tables. (currently it is not possible to use with QTextDocument); HypothesisTestPrivate::HypothesisTestPrivate(HypothesisTest* owner) : q(owner), summaryLayout(new QVBoxLayout()) { for (int i = 0; i < 10; i++) { resultLine[i] = new QLabel(); summaryLayout->addWidget(resultLine[i]); } } HypothesisTestPrivate::~HypothesisTestPrivate() { } void HypothesisTestPrivate::setDataSourceSpreadsheet(Spreadsheet* spreadsheet) { dataSourceSpreadsheet = spreadsheet; //setting rows and columns count; // rowCount = dataSourceSpreadsheet->rowCount(); // columnCount = dataSourceSpreadsheet->columnCount(); for (auto* col : dataSourceSpreadsheet->children()) allColumns << col->name(); } void HypothesisTestPrivate::setColumns(QStringList cols) { columns.clear(); Column* column = new Column("column"); for (QString col : cols) { if (!cols.isEmpty()) { column = dataSourceSpreadsheet->column(col); columns.append(column); } } delete[] column; } /**************************Two Sample Independent *************************************/ void HypothesisTestPrivate::performTwoSampleIndependentTest(HypothesisTest::Test::Type test, bool categoricalVariable, bool equalVariance) { - clearTestView(); - if (columns.size() != 2) { printError("Inappropriate number of columns selected"); return; } int n[2]; double sum[2], mean[2], std[2]; QString col1Name = columns[0]->name(); QString col2Name = columns[1]->name(); if (!categoricalVariable && isNumericOrInteger(columns[0])) { for (int i = 0; i < 2; i++) { findStats(columns[i], n[i], sum[i], mean[i], std[i]); if (n[i] == 0) { printError("Atleast two values should be there in every column"); return; } if (std[i] == 0) { printError(i18n("Standard Deviation of atleast one column is equal to 0: last column is: %1", columns[i]->name())); return; } } } else { QMap colName; QString baseColName; int np; int totalRows; countPartitions(columns[0], np, totalRows); if (np != 2) { printError( i18n("Number of Categorical Variable in Column %1 is not equal to 2", columns[0]->name())); return; } if (isNumericOrInteger(columns[0])) baseColName = columns[0]->name(); ErrorType errorCode = findStatsCategorical(columns[0], columns[1], n, sum, mean, std, colName, np, totalRows); switch (errorCode) { case ErrorUnqualSize: { printError( i18n("Unequal size between Column %1 and Column %2", columns[0]->name(), columns[1]->name())); return; } case ErrorEmptyColumn: { printError("At least one of selected column is empty"); return; } case NoError: break; } QMapIterator i(colName); while (i.hasNext()) { i.next(); if (i.value() == 1) col1Name = baseColName + " " + i.key(); else col2Name = baseColName + " " + i.key(); } } QVariant rowMajor[] = {"", "N", "Sum", "Mean", "Std", col1Name, n[0], sum[0], mean[0], std[0], col2Name, n[1], sum[1], mean[1], std[1] }; statsTable = getHtmlTable(3, 5, rowMajor); for (int i = 0; i < 2; i++) { if (n[i] == 0) { printError("Atleast two values should be there in every column"); return; } if (std[i] == 0) { printError( i18n("Standard Deviation of atleast one column is equal to 0: last column is: %1", columns[i]->name())); return; } } QString testName; int df = 0; double sp = 0; switch (test) { case HypothesisTest::Test::Type::TTest: { testName = "T"; if (equalVariance) { df = n[0] + n[1] - 2; sp = qSqrt(((n[0]-1) * gsl_pow_2(std[0]) + (n[1]-1) * gsl_pow_2(std[1]) ) / df ); - statisticValue = (mean[0] - mean[1]) / (sp * qSqrt(1.0/n[0] + 1.0/n[1])); + statisticValue.append((mean[0] - mean[1]) / (sp * qSqrt(1.0/n[0] + 1.0/n[1]))); printLine(9, "Assumption: Equal Variance b/w both population means"); } else { double temp_val; temp_val = gsl_pow_2( gsl_pow_2(std[0]) / n[0] + gsl_pow_2(std[1]) / n[1]); temp_val = temp_val / ( (gsl_pow_2( (gsl_pow_2(std[0]) / n[0]) ) / (n[0]-1)) + (gsl_pow_2( (gsl_pow_2(std[1]) / n[1]) ) / (n[1]-1))); df = qRound(temp_val); - statisticValue = (mean[0] - mean[1]) / (qSqrt( (gsl_pow_2(std[0])/n[0]) + - (gsl_pow_2(std[1])/n[1]))); + statisticValue.append((mean[0] - mean[1]) / (qSqrt( (gsl_pow_2(std[0])/n[0]) + + (gsl_pow_2(std[1])/n[1])))); printLine(9, "Assumption: UnEqual Variance b/w both population means"); } printLine(8, "Assumption: Both Populations approximately follow normal distribution"); break; } case HypothesisTest::Test::Type::ZTest: { testName = "Z"; sp = qSqrt( ((n[0]-1) * gsl_pow_2(std[0]) + (n[1]-1) * gsl_pow_2(std[1])) / df); - statisticValue = (mean[0] - mean[1]) / (sp * qSqrt( 1.0 / n[0] + 1.0 / n[1])); - pValue = gsl_cdf_gaussian_P(statisticValue, sp); + statisticValue.append((mean[0] - mean[1]) / (sp * qSqrt( 1.0 / n[0] + 1.0 / n[1]))); +// pValue.append(gsl_cdf_gaussian_P(statisticValue, sp)); break; } case HypothesisTest::Test::Type::Anova: case HypothesisTest::Test::Type::NoneType: break; } currTestName = "

" + i18n("Two Sample Independent %1 Test for %2 vs %3", testName, col1Name, col2Name) + "

"; - pValue = getPValue(test, statisticValue, col1Name, col2Name, (mean[0] - mean[1]), sp, df); + pValue.append(getPValue(test, statisticValue[0], col1Name, col2Name, (mean[0] - mean[1]), sp, df)); printLine(2, i18n("Significance level is %1", round(significanceLevel)), "blue"); - printLine(4, i18n("%1 Value is %2 ", testName, round(statisticValue)), "green"); + printLine(4, i18n("%1 Value is %2 ", testName, round(statisticValue[0])), "green"); printTooltip(4, i18n("More is the |%1-value|, more safely we can reject the null hypothesis", testName)); - printLine(5, i18n("P Value is %1 ", pValue), "green"); + printLine(5, i18n("P Value is %1 ", pValue[0]), "green"); printLine(6, i18n("Degree of Freedom is %1", df), "green"); printTooltip(6, i18n("Number of independent Pieces of information that went into calculating the estimate")); - if (pValue <= significanceLevel) + if (pValue[0] <= significanceLevel) printTooltip(5, i18n("We can safely reject Null Hypothesis for significance level %1", round(significanceLevel))); else printTooltip(5, i18n("There is a plausibility for Null Hypothesis to be true")); return; } /********************************Two Sample Paired ***************************************/ void HypothesisTestPrivate::performTwoSamplePairedTest(HypothesisTest::Test::Type test) { - clearTestView(); - if (columns.size() != 2) { printError("Inappropriate number of columns selected"); return; } for (int i = 0; i < 2; i++) { if ( !isNumericOrInteger(columns[0])) { printError("select only columns with numbers"); return; } } int n; double sum, mean, std; ErrorType errorCode = findStatsPaired(columns[0], columns[1], n, sum, mean, std); switch (errorCode) { case ErrorUnqualSize: { printError("both columns are having different sizes"); return; } case ErrorEmptyColumn: { printError("columns are empty"); return; } case NoError: break; } QVariant rowMajor[] = {"", "N", "Sum", "Mean", "Std", "difference", n, sum, mean, std }; statsTable = getHtmlTable(2, 5, rowMajor); if (std == 0) { printError("Standard deviation of the difference is 0"); return; } QString testName; int df = 0; switch (test) { case HypothesisTest::Test::Type::TTest: { - statisticValue = mean / (std / qSqrt(n)); + statisticValue[0] = mean / (std / qSqrt(n)); df = n - 1; testName = "T"; printLine(6, i18n("Degree of Freedom is %1name(), i18n("%1", populationMean), mean, std, df); + pValue.append(getPValue(test, statisticValue[0], columns[0]->name(), i18n("%1", populationMean), mean, std, df)); currTestName = "

" + i18n("One Sample %1 Test for %2 vs %3", testName, columns[0]->name(), columns[1]->name()) + "

"; printLine(2, i18n("Significance level is %1 ", round(significanceLevel)), "blue"); - printLine(4, i18n("%1 Value is %2 ", testName, round(statisticValue)), "green"); - printLine(5, i18n("P Value is %1 ", pValue), "green"); + printLine(4, i18n("%1 Value is %2 ", testName, round(statisticValue[0])), "green"); + printLine(5, i18n("P Value is %1 ", pValue[0]), "green"); - if (pValue <= significanceLevel) + if (pValue[0] <= significanceLevel) printTooltip(5, i18n("We can safely reject Null Hypothesis for significance level %1", significanceLevel)); else printTooltip(5, i18n("There is a plausibility for Null Hypothesis to be true")); return; } /******************************** One Sample ***************************************/ void HypothesisTestPrivate::performOneSampleTest(HypothesisTest::Test::Type test) { - clearTestView(); - if (columns.size() != 1) { printError("Inappropriate number of columns selected"); return; } if ( !isNumericOrInteger(columns[0])) { printError("select only columns with numbers"); return; } int n; double sum, mean, std; ErrorType errorCode = findStats(columns[0], n, sum, mean, std); switch (errorCode) { case ErrorEmptyColumn: { printError("column is empty"); return; } case NoError: break; case ErrorUnqualSize: { return; } } QVariant rowMajor[] = {"", "N", "Sum", "Mean", "Std", columns[0]->name(), n, sum, mean, std }; statsTable = getHtmlTable(2, 5, rowMajor); if (std == 0) { printError("Standard deviation is 0"); return; } QString testName; int df = 0; switch (test) { case HypothesisTest::Test::Type::TTest: { testName = "T"; - statisticValue = (mean - populationMean) / (std / qSqrt(n)); + statisticValue.append((mean - populationMean) / (std / qSqrt(n))); df = n - 1; printLine(6, i18n("Degree of Freedom is %1", df), "blue"); break; } case HypothesisTest::Test::Type::ZTest: { testName = "Z"; df = 0; - statisticValue = (mean - populationMean) / (std / qSqrt(n)); + statisticValue.append((mean - populationMean) / (std / qSqrt(n))); break; } case HypothesisTest::Test::Type::Anova: case HypothesisTest::Test::Type::NoneType: break; } - pValue = getPValue(test, statisticValue, columns[0]->name(), i18n("%1",populationMean), mean - populationMean, std, df); + pValue.append(getPValue(test, statisticValue[0], columns[0]->name(), i18n("%1",populationMean), mean - populationMean, std, df)); currTestName = "

" + i18n("One Sample %1 Test for %2", testName, columns[0]->name()) + "

"; printLine(2, i18n("Significance level is %1", round(significanceLevel)), "blue"); - printLine(4, i18n("%1 Value is %2", testName, round(statisticValue)), "green"); - printLine(5, i18n("P Value is %1", pValue), "green"); + printLine(4, i18n("%1 Value is %2", testName, round(statisticValue[0])), "green"); + printLine(5, i18n("P Value is %1", pValue[0]), "green"); - if (pValue <= significanceLevel) + if (pValue[0] <= significanceLevel) printTooltip(5, i18n("We can safely reject Null Hypothesis for significance level %1", significanceLevel)); else printTooltip(5, i18n("There is a plausibility for Null Hypothesis to be true")); return; } /*************************************One Way Anova***************************************/ // all standard variables and formulas are taken from this wikipedia page: // https://en.wikipedia.org/wiki/One-way_analysis_of_variance // b stands for b/w groups // w stands for within groups // np is number of partition i.e., number of classes void HypothesisTestPrivate::performOneWayAnova() { - clearTestView(); - int np, totalRows; + int np, totalRows; countPartitions(columns[0], np, totalRows); int* ni = new int[np]; double* sum = new double[np]; double* mean = new double[np]; double* std = new double[np]; QString* colNames = new QString[np]; QMap classnameToIndex; QString baseColName; if (isNumericOrInteger(columns[0])) baseColName = columns[0]->name(); findStatsCategorical(columns[0], columns[1], ni, sum, mean, std, classnameToIndex, np, totalRows); double yBar = 0; // overall mean double sB = 0; // sum of squares of (mean - overall_mean) between the groups int fB = 0; // degree of freedom between the groups double msB = 0; // mean sum of squares between the groups double sW = 0; // sum of squares of (value - mean of group) within the groups int fW = 0; // degree of freedom within the group double msW = 0; // mean sum of squares within the groups - double fValue = 0; - // now finding mean of each group; for (int i = 0; i < np; i++) yBar += mean[i]; yBar = yBar / np; for (int i = 0; i < np; i++) { sB += ni[i] * gsl_pow_2( ( mean[i] - yBar)); if (ni[i] > 1) sW += gsl_pow_2( std[i])*(ni[i] - 1); else sW += gsl_pow_2( std[i]); fW += ni[i] - 1; } fB = np - 1; msB = sB / fB; msW = sW / fW; - fValue = msB / msW; + statisticValue.append(msB / msW); - pValue = nsl_stats_fdist_p(fValue, static_cast(np-1), fW); + pValue.append(nsl_stats_fdist_p(statisticValue[0], static_cast(np-1), fW)); QMapIterator i(classnameToIndex); while (i.hasNext()) { i.next(); colNames[i.value()-1] = baseColName + " " + i.key(); } // now printing the statistics and result; int rowCount = np + 1, columnCount = 5; QVariant* rowMajor = new QVariant[rowCount*columnCount]; // header data; rowMajor[0] = ""; rowMajor[1] = "Ni"; rowMajor[2] = "Sum"; rowMajor[3] = "Mean"; rowMajor[4] = "Std"; // table data for (int row_i = 1; row_i < rowCount ; row_i++) { rowMajor[row_i*columnCount] = colNames[row_i - 1]; rowMajor[row_i*columnCount + 1] = ni[row_i - 1]; rowMajor[row_i*columnCount + 2] = sum[row_i - 1]; rowMajor[row_i*columnCount + 3] = mean[row_i - 1]; rowMajor[row_i*columnCount + 4] = std[row_i - 1]; } statsTable = "

" + i18n("Group Summary Statistics") + "

"; statsTable += getHtmlTable(rowCount, columnCount, rowMajor); statsTable += getLine(""); statsTable += getLine(""); statsTable += "

" + i18n("Grand Summary Statistics") + "

"; statsTable += getLine(""); statsTable += getLine(i18n("Overall Mean is %1", round(yBar))); rowCount = 4; columnCount = 3; rowMajor->clear(); rowMajor[0] = ""; rowMajor[1] = "Between Groups"; rowMajor[2] = "Within Groups"; int baseIndex = 0; baseIndex = 1 * columnCount; rowMajor[baseIndex + 0] = "Sum of Squares"; rowMajor[baseIndex + 1] = sB; rowMajor[baseIndex + 2] = sW; baseIndex = 2 * columnCount; rowMajor[baseIndex + 0] = "Degree of Freedom"; rowMajor[baseIndex + 1] = fB; rowMajor[baseIndex + 2] = fW; baseIndex = 3 * columnCount; rowMajor[baseIndex + 0] = "Mean Square Value"; rowMajor[baseIndex + 1] = msB; rowMajor[baseIndex + 2] = msW; statsTable += getHtmlTable(rowCount, columnCount, rowMajor); delete[] ni; delete[] sum; delete[] mean; delete[] std; delete[] colNames; - printLine(1, i18n("F Value is %1", round(fValue)), "green"); - printLine(2, i18n("P Value is %1 ", pValue), "green"); + printLine(1, i18n("F Value is %1", round(statisticValue[0])), "green"); + printLine(2, i18n("P Value is %1 ", pValue[0]), "green"); - if (pValue <= significanceLevel) + if (pValue[0] <= significanceLevel) printTooltip(2, i18n("We can safely reject Null Hypothesis for significance level %1", significanceLevel)); else printTooltip(2, i18n("There is a plausibility for Null Hypothesis to be true")); return; } /*************************************Two Way Anova***************************************/ // all formulas and symbols are taken from: http://statweb.stanford.edu/~susan/courses/s141/exanova.pdf //TODO: suppress warning of variable length array are a C99 feature. -//TODO: changed int mean to double mean; +//TODO: add assumptions verification option +//TODO: add tail option (if needed) void HypothesisTestPrivate::performTwoWayAnova() { - clearTestView(); int np_a, totalRows_a; int np_b, totalRows_b; countPartitions(columns[0], np_a, totalRows_a); countPartitions(columns[1], np_b, totalRows_b); double groupMean[np_a][np_b]; int replicates[np_a][np_b]; for (int i = 0; i < np_a; i++) for (int j = 0; j < np_b; j++) { groupMean[i][j] = 0; replicates[i][j] = 0; } if (totalRows_a != totalRows_b) { printError("There is missing data in atleast one of the rows"); return; } QMap catToNumber_a; QMap catToNumber_b; int partitionNumber_a = 1; int partitionNumber_b = 1; for (int i = 0; i < totalRows_a; i++) { QString name_a = columns[0]->textAt(i); QString name_b = columns[1]->textAt(i); double value = columns[2]->valueAt(i); if (catToNumber_a[name_a] == 0) { catToNumber_a[name_a] = partitionNumber_a; partitionNumber_a++; } if (catToNumber_b[name_b] == 0) { catToNumber_b[name_b] = partitionNumber_b; partitionNumber_b++; } groupMean[catToNumber_a[name_a] - 1][catToNumber_b[name_b] - 1] += value; replicates[catToNumber_a[name_a] - 1][catToNumber_b[name_b] - 1] += 1; } int replicate = replicates[0][0]; for (int i = 0; i < np_a; i++) for (int j = 0; j < np_b; j++) { if (replicates[i][j] == 0) { printError("Dataset should have atleast one data value corresponding to each feature combination"); return; } if (replicates[i][j] != replicate) { printError("Number of experiments perfomed for each combination of levels
" "between Independet Var.1 and Independent Var.2 must be equal"); return; } groupMean[i][j] /= replicates[i][j]; } - for (int i = 0; i < np_a; i++) - for (int j = 0; j < np_b; j++) - groupMean[i][j] = int(groupMean[i][j]); +// for (int i = 0; i < np_a; i++) +// for (int j = 0; j < np_b; j++) +// groupMean[i][j] = int(groupMean[i][j]); double ss_within = 0; for (int i = 0; i < totalRows_a; i++) { QString name_a = columns[0]->textAt(i); QString name_b = columns[1]->textAt(i); double value = columns[2]->valueAt(i); ss_within += gsl_pow_2(value - groupMean[catToNumber_a[name_a] - 1][catToNumber_b[name_b] - 1]); } int df_within = (replicate - 1) * np_a * np_b; double ms_within = ss_within / df_within; double mean_a[np_a]; double mean_b[np_b]; for (int i = 0; i < np_a; i++) { for (int j = 0; j < np_b; j++) { mean_a[i] += groupMean[i][j] / np_b; mean_b[j] += groupMean[i][j] / np_a; } } double mean = 0; for (int i = 0; i < np_a; i++) mean += mean_a[i] / np_a; - QDEBUG("ss_within is " << ss_within); - QDEBUG("df_within is " << df_within); - QDEBUG("ms_within is " << ms_within); + double ss_a = 0; for (int i = 0; i < np_a; i++) - QDEBUG("mean_a is " << mean_a[i]); + ss_a += gsl_pow_2(mean_a[i] - mean); + ss_a *= replicate * np_b; + + int df_a = np_a - 1; + double ms_a = ss_a / df_a; + + double ss_b = 0; for (int i = 0; i < np_b; i++) - QDEBUG("mean_b is " << mean_b[i]); + ss_b += gsl_pow_2(mean_b[i] - mean); + ss_b *= replicate * np_a; + + int df_b = np_b - 1; + double ms_b = ss_b / df_b; + + double ss_interaction = 0; + + for (int i = 0; i < np_a; i++) + for (int j = 0; j < np_b; j++) + ss_interaction += gsl_pow_2(groupMean[i][j] - mean_a[i] - mean_b[j] + mean); + ss_interaction *= replicate; + int df_interaction = (np_a - 1) * (np_b - 1); + double ms_interaction = ss_interaction / df_interaction; QString partitionNames_a[np_a]; QString partitionNames_b[np_b]; QMapIterator itr_a(catToNumber_a); while (itr_a.hasNext()) { itr_a.next(); partitionNames_a[itr_a.value()-1] = itr_a.key(); } QMapIterator itr_b(catToNumber_b); while (itr_b.hasNext()) { itr_b.next(); partitionNames_b[itr_b.value()-1] = itr_b.key(); } // printing table; // cell constructor structure; data, level, rowSpanCount, columnSpanCount, isHeader; QList rowMajor; rowMajor.append(new Cell("", 0, true, 2, 1)); for (int i = 0; i < np_b; i++) rowMajor.append(new Cell(partitionNames_b[i], 0, true, 1, 2)); rowMajor.append(new Cell("Mean", 0, true, 2)); for (int i = 0; i < np_b; i++) { rowMajor.append(new Cell("Mean", 1, true)); rowMajor.append(new Cell("Replicate", 1, true)); } int level = 2; for (int i = 0; i < np_a; i++) { rowMajor.append(new Cell(partitionNames_a[i], level, true)); for (int j = 0; j < np_b; j++) { - rowMajor.append(new Cell(groupMean[i][j], level)); + rowMajor.append(new Cell(round(groupMean[i][j]), level)); rowMajor.append(new Cell(replicates[i][j], level)); } - rowMajor.append(new Cell(mean_a[i], level)); + rowMajor.append(new Cell(round(mean_a[i]), level)); level++; } rowMajor.append(new Cell("Mean", level, true)); for (int i = 0; i < np_b; i++) - rowMajor.append(new Cell(mean_b[i], level, false, 1, 2)); - rowMajor.append(new Cell(mean, level)); + rowMajor.append(new Cell(round(mean_b[i]), level, false, 1, 2)); + rowMajor.append(new Cell(round(mean), level)); statsTable = "

" + i18n("Contingency Table") + "

"; statsTable += getHtmlTable3(rowMajor); -// QDEBUG(""); -// QDEBUG(""); -// QDEBUG(statsTable); + statsTable += "
"; + statsTable += "

" + i18n("results table") + "

"; + + rowMajor.clear(); + level = 0; + rowMajor.append(new Cell("", level, true)); + rowMajor.append(new Cell("SS", level, true)); + rowMajor.append(new Cell("DF", level, true)); + rowMajor.append(new Cell("MS", level, true)); + + level++; + rowMajor.append(new Cell(columns[0]->name(), level, true)); + rowMajor.append(new Cell(round(ss_a), level)); + rowMajor.append(new Cell(df_a, level)); + rowMajor.append(new Cell(round(ms_a), level)); + + level++; + rowMajor.append(new Cell(columns[1]->name(), level, true)); + rowMajor.append(new Cell(round(ss_b), level)); + rowMajor.append(new Cell(df_b, level)); + rowMajor.append(new Cell(round(ms_b), level)); + + level++; + rowMajor.append(new Cell("Interaction", level, true)); + rowMajor.append(new Cell(round(ss_interaction), level)); + rowMajor.append(new Cell(df_interaction, level)); + rowMajor.append(new Cell(round(ms_interaction), level)); + + level++; + rowMajor.append(new Cell("Within", level, true)); + rowMajor.append(new Cell(round(ss_within), level)); + rowMajor.append(new Cell(df_within, level)); + rowMajor.append(new Cell(round(ms_within), level)); + + statsTable += getHtmlTable3(rowMajor); + + double fValue_a = ms_a / ms_within; + double fValue_b = ms_b / ms_within; + double fValue_interaction = ms_interaction / ms_within; + + double pValue_a = nsl_stats_fdist_p(fValue_a, static_cast(np_a - 1), df_a); + double pValue_b = nsl_stats_fdist_p(fValue_b, static_cast(np_b - 1), df_b); + + printLine(0, "F(df" + columns[0]->name() + ", dfwithin) is " + round(fValue_a), "blue"); + printLine(1, "F(df" + columns[1]->name() + ", dfwithin) is " + round(fValue_b), "blue"); + printLine(2, "F(dfinteraction, dfwithin) is " + round(fValue_interaction), "blue"); + + printLine(4, "P(df" + columns[0]->name() + ", dfwithin) is " + round(pValue_a), "blue"); + printLine(5, "P(df" + columns[1]->name() + ", dfwithin) is " + round(pValue_b), "blue"); +// printLine(2, "P(dfinteraction, dfwithin) is " + round(fValue_interaction), "blue"); + + statisticValue.append(fValue_a); + statisticValue.append(fValue_b); + statisticValue.append(fValue_interaction); + + pValue.append(pValue_a); + pValue.append(pValue_b); + return; } /**************************************Levene Test****************************************/ // Some reference to local variables. // np = number of partitions // df = degree of fredom // totalRows = total number of rows in column // these variables are taken from: https://en.wikipedia.org/wiki/Levene%27s_test // yiBar = mean of ith group; // Zij = |Yij - yiBar| // ziBar = mean of Zij for group i // ziBarBar = mean for all zij // ni = number of elements in group i void HypothesisTestPrivate::performLeveneTest(bool categoricalVariable) { - clearTestView(); - if (columns.size() != 2) { printError("Inappropriate number of columns selected"); return; } int np = 0; int n = 0; if (!categoricalVariable && isNumericOrInteger(columns[0])) np = columns.size(); else countPartitions(columns[0], np, n); if (np < 2) { printError("Select atleast two columns / classes"); return; } double* yiBar = new double[np]; double* ziBar = new double[np]; double ziBarBar = 0; double* ni = new double[np]; for (int i = 0; i < np; i++) { yiBar[i] = 0; ziBar[i] = 0; ni[i] = 0; } double fValue; int df = 0; int totalRows = 0; QString* colNames = new QString[np]; if (!categoricalVariable && isNumericOrInteger(columns[0])) { totalRows = columns[0]->rowCount(); double value = 0; for (int j = 0; j < totalRows; j++) { int numberNaNCols = 0; for (int i = 0; i < np; i++) { value = columns[i]->valueAt(j); if (std::isnan(value)) { numberNaNCols++; continue; } yiBar[i] += value; ni[i]++; n++; } if (numberNaNCols == np) { totalRows = j; break; } } for (int i = 0; i < np; i++) { if (ni[i] > 0) yiBar[i] = yiBar[i] / ni[i]; else { printError("One of the selected columns is empty"); return; } } for (int j = 0; j < totalRows; j++) { for (int i = 0; i < np; i++) { value = columns[i]->valueAt(j); if (!(std::isnan(value))) ziBar[i] += fabs(value - yiBar[i]); } } for (int i = 0; i < np; i++) { ziBarBar += ziBar[i]; if (ni[i] > 0) ziBar[i] = ziBar[i] / ni[i]; } ziBarBar = ziBarBar / n; double numberatorValue = 0; double denominatorValue = 0; for (int j = 0; j < totalRows; j++) { for (int i = 0; i < np; i++) { value = columns[i]->valueAt(j); if (!(std::isnan(value))) { double zij = fabs(value - yiBar[i]); denominatorValue += gsl_pow_2( (zij - ziBar[i])); } } } if (denominatorValue <= 0) { printError( i18n("Denominator value is %1", denominatorValue)); return; } for (int i = 0; i < np; i++) { colNames[i] = columns[i]->name(); numberatorValue += ni[i]*gsl_pow_2( (ziBar[i]-ziBarBar)); } fValue = ((n - np) / (np - 1)) * (numberatorValue / denominatorValue); } else { QMap classnameToIndex; AbstractColumn::ColumnMode originalColMode = columns[0]->columnMode(); columns[0]->setColumnMode(AbstractColumn::Text); int partitionNumber = 1; QString name; double value; int classIndex; for (int j = 0; j < n; j++) { name = columns[0]->textAt(j); value = columns[1]->valueAt(j); if (std::isnan(value)) { n = j; break; } if (classnameToIndex[name] == 0) { classnameToIndex[name] = partitionNumber; partitionNumber++; } classIndex = classnameToIndex[name]-1; ni[classIndex]++; yiBar[classIndex] += value; } for (int i = 0; i < np; i++) { if (ni[i] > 0) yiBar[i] = yiBar[i] / ni[i]; else { printError("One of the selected columns is empty"); return; } } for (int j = 0; j < n; j++) { name = columns[0]->textAt(j); value = columns[1]->valueAt(j); classIndex = classnameToIndex[name] - 1; ziBar[classIndex] += fabs(value - yiBar[classIndex]); } for (int i = 0; i < np; i++) { ziBarBar += ziBar[i]; ziBar[i] = ziBar[i] / ni[i]; } ziBarBar = ziBarBar / n; double numberatorValue = 0; double denominatorValue = 0; for (int j = 0; j < n; j++) { name = columns[0]->textAt(j); value = columns[1]->valueAt(j); classIndex = classnameToIndex[name] - 1; double zij = fabs(value - yiBar[classIndex]); denominatorValue += gsl_pow_2( (zij - ziBar[classIndex])); } for (int i = 0; i < np; i++) numberatorValue += ni[i]*gsl_pow_2( (ziBar[i]-ziBarBar)); if (denominatorValue <= 0) { printError( "number of data points is less or than equal to number of categorical variables"); return; } fValue = ((n - np) / (np - 1)) * (numberatorValue / denominatorValue); QMapIterator i(classnameToIndex); while (i.hasNext()) { i.next(); colNames[i.value()-1] = columns[0]->name() + " " + i.key(); } columns[0]->setColumnMode(originalColMode); } df = n - np; // now making the stats table. int rowCount = np+1; int columnCount = 4; QVariant* rowMajor = new QVariant[rowCount*columnCount]; // header data; rowMajor[0] = ""; rowMajor[1] = "Ni"; rowMajor[2] = "yiBar"; rowMajor[3] = "ziBar"; // table data for (int row_i = 1; row_i < rowCount; row_i++) { rowMajor[row_i*columnCount] = colNames[row_i-1]; rowMajor[row_i*columnCount + 1] = ni[row_i-1]; rowMajor[row_i*columnCount + 2] = yiBar[row_i-1]; rowMajor[row_i*columnCount + 3] = ziBar[row_i-1]; } statsTable = getHtmlTable(rowCount, columnCount, rowMajor); delete[] rowMajor; delete[] yiBar; delete[] ziBar; delete[] ni; - pValue = nsl_stats_fdist_p(fValue, static_cast(np-1), df); + pValue.append(nsl_stats_fdist_p(fValue, static_cast(np-1), df)); printLine(0, "Null Hypothesis: Variance is equal between all classes", "blue"); printLine(1, "Alternate Hypothesis: Variance is not equal in at-least one pair of classes", "blue"); printLine(2, i18n("Significance level is %1", round(significanceLevel)), "blue"); printLine(4, i18n("F Value is %1 ", round(fValue)), "green"); - printLine(5, i18n("P Value is %1 ", pValue), "green"); + printLine(5, i18n("P Value is %1 ", pValue[0]), "green"); printLine(6, i18n("Degree of Freedom is %1", df), "green"); - if (pValue <= significanceLevel) { + if (pValue[0] <= significanceLevel) { printTooltip(5, i18n("We can safely reject Null Hypothesis for significance level %1", significanceLevel)); printLine(8, "Requirement for homogeneity is not met", "red"); } else { printTooltip(5, i18n("There is a plausibility for Null Hypothesis to be true")); printLine(8, "Requirement for homogeneity is met", "green"); } + statisticValue.append(fValue); return; } /***************************************Helper Functions*************************************/ QString HypothesisTestPrivate::round(QVariant number, int precision) { if (number.userType() == QMetaType::Double || number.userType() == QMetaType::Float) { double multiplierPrecision = qPow(10, precision); int tempNum = int(number.toDouble()*multiplierPrecision*10); if (tempNum % 10 < 5) return QString::number((tempNum/10) / multiplierPrecision); else return QString::number((tempNum/10 + 1) / multiplierPrecision); } return i18n("%1", number.toString()); } bool HypothesisTestPrivate::isNumericOrInteger(Column* column) { return (column->columnMode() == AbstractColumn::Numeric || column->columnMode() == AbstractColumn::Integer); } HypothesisTestPrivate::ErrorType HypothesisTestPrivate::findStats(const Column* column, int& count, double& sum, double& mean, double& std) { sum = 0; mean = 0; std = 0; count = column->rowCount(); for (int i = 0; i < count; i++) { double row = column->valueAt(i); if ( std::isnan(row)) { count = i; break; } sum += row; } if (count < 1) return HypothesisTestPrivate::ErrorEmptyColumn; mean = sum / count; for (int i = 0; i < count; i++) { double row = column->valueAt(i); std += gsl_pow_2( (row - mean)); } if (count > 1) std = std / (count-1); std = qSqrt(std); return HypothesisTestPrivate::NoError; } HypothesisTestPrivate::ErrorType HypothesisTestPrivate::findStatsPaired(const Column* column1, const Column* column2, int& count, double& sum, double& mean, double& std) { sum = 0; mean = 0; std = 0; int count1 = column1->rowCount(); int count2 = column2->rowCount(); count = qMin(count1, count2); double cell1, cell2; for (int i = 0; i < count; i++) { cell1 = column1->valueAt(i); cell2 = column2->valueAt(i); if (std::isnan(cell1) || std::isnan(cell2)) { if (std::isnan(cell1) && std::isnan(cell2)) count = i; else return HypothesisTestPrivate::ErrorUnqualSize; break; } sum += cell1 - cell2; } if (count < 1) return HypothesisTestPrivate::ErrorEmptyColumn; mean = sum / count; double row; for (int i = 0; i < count; i++) { cell1 = column1->valueAt(i); cell2 = column2->valueAt(i); row = cell1 - cell2; std += gsl_pow_2( (row - mean)); } if (count > 1) std = std / (count-1); std = qSqrt(std); return HypothesisTestPrivate::NoError; } void HypothesisTestPrivate::countPartitions(Column* column, int& np, int& totalRows) { totalRows = column->rowCount(); np = 0; QString cellValue; QMap discoveredCategoricalVar; AbstractColumn::ColumnMode originalColMode = column->columnMode(); column->setColumnMode(AbstractColumn::Text); for (int i = 0; i < totalRows; i++) { cellValue = column->textAt(i); if (cellValue.isEmpty()) { totalRows = i; break; } if (discoveredCategoricalVar[cellValue]) continue; discoveredCategoricalVar[cellValue] = true; np++; } column->setColumnMode(originalColMode); } HypothesisTestPrivate::ErrorType HypothesisTestPrivate::findStatsCategorical(Column* column1, Column* column2, int n[], double sum[], double mean[], double std[], QMap& colName, const int& np, const int& totalRows) { Column* columns[] = {column1, column2}; for (int i = 0; i < np; i++) { n[i] = 0; sum[i] = 0; mean[i] = 0; std[i] = 0; } AbstractColumn::ColumnMode originalColMode = columns[0]->columnMode(); columns[0]->setColumnMode(AbstractColumn::Text); int partitionNumber = 1; for (int i = 0; i < totalRows; i++) { QString name = columns[0]->textAt(i); double value = columns[1]->valueAt(i); if (std::isnan(value)) { columns[0]->setColumnMode(originalColMode); return HypothesisTestPrivate::ErrorUnqualSize; } if (colName[name] == 0) { colName[name] = partitionNumber; partitionNumber++; } n[colName[name]-1]++; sum[colName[name]-1] += value; } for (int i = 0; i < np; i++) mean[i] = sum[i] / n[i]; for (int i = 0; i < totalRows; i++) { QString name = columns[0]->textAt(i); double value = columns[1]->valueAt(i); std[colName[name]-1] += gsl_pow_2( (value - mean[colName[name]-1])); } for (int i = 0; i < np; i++) { if (n[i] > 1) std[i] = std[i] / (n[i] - 1); std[i] = qSqrt(std[i]); } columns[0]->setColumnMode(originalColMode); if (isNumericOrInteger(columns[0])) { } return HypothesisTestPrivate::NoError; } //TODO change ("⋖") symbol to ("<"), currently macro UTF8_QSTRING is not working properly if used "<" symbol; // TODO: check for correctness between: for TestZ with TailTwo -// pValue = 2*gsl_cdf_tdist_P(value, df) v/s -// pValue = gsl_cdf_tdis_P(value, df) + gsl_cdf_tdis_P(-value, df); +// pValue.append(2*gsl_cdf_tdist_P(value, df) v/s +// pValue.append(gsl_cdf_tdis_P(value, df) + gsl_cdf_tdis_P(-value, df); double HypothesisTestPrivate::getPValue(const HypothesisTest::Test::Type& test, double& value, const QString& col1Name, const QString& col2Name, const double mean, const double sp, const int df) { switch (test) { case HypothesisTest::Test::Type::TTest: { switch (tailType) { case HypothesisTest::Test::Tail::Negative: { - pValue = gsl_cdf_tdist_P(value, df); + pValue.append(gsl_cdf_tdist_P(value, df)); printLine(0, i18n("Null Hypothesis: Population mean of %1 %2 Population mean of %3", col1Name, UTF8_QSTRING("≥"), col2Name), "blue"); printLine(1, i18n("Alternate Hypothesis: Population mean of %1 %2 Population mean of %3", col1Name, UTF8_QSTRING("⋖"), col2Name), "blue"); break; } case HypothesisTest::Test::Tail::Positive: { value *= -1; - pValue = gsl_cdf_tdist_P(value, df); + pValue.append(gsl_cdf_tdist_P(value, df)); printLine(0, i18n("Null Hypothesis: Population mean of %1 %2 Population mean of %3", col1Name, UTF8_QSTRING("≤"), col2Name), "blue"); printLine(1, i18n("Alternate Hypothesis: Population mean of %1 %2 Population mean of %3", col1Name, UTF8_QSTRING(">"), col2Name), "blue"); break; } case HypothesisTest::Test::Tail::Two: { - pValue = 2.*gsl_cdf_tdist_P(-fabs(value), df); + pValue.append(2.*gsl_cdf_tdist_P(-fabs(value), df)); printLine(0, i18n("Null Hypothesis: Population mean of %1 %2 Population mean of %3", col1Name, UTF8_QSTRING("="), col2Name), "blue"); printLine(1, i18n("Alternate Hypothesis: Population mean of %1 %2 Population mean of %3", col1Name, UTF8_QSTRING("≠"), col2Name), "blue"); break; } } break; } case HypothesisTest::Test::Type::ZTest: { switch (tailType) { case HypothesisTest::Test::Tail::Negative: { - pValue = gsl_cdf_gaussian_P(value - mean, sp); + pValue.append(gsl_cdf_gaussian_P(value - mean, sp)); printLine(0, i18n("Null Hypothesis: Population mean of %1 %2 Population mean of %3 ", col1Name, UTF8_QSTRING("≥"), col2Name), "blue"); printLine(1, i18n("Alternate Hypothesis: Population mean of %1 %2 Population mean of %3 ", col1Name, UTF8_QSTRING("⋖"), col2Name), "blue"); break; } case HypothesisTest::Test::Tail::Positive: { value *= -1; - pValue = nsl_stats_tdist_p(value - mean, sp); + pValue.append(nsl_stats_tdist_p(value - mean, sp)); printLine(0, i18n("Null Hypothesis: Population mean of %1 %2 Population mean of %3 ", col1Name, UTF8_QSTRING("≤"), col2Name), "blue"); printLine(1, i18n("Alternate Hypothesis: Population mean of %1 %2 Population mean of %3 ", col1Name, UTF8_QSTRING(">"), col2Name), "blue"); break; } case HypothesisTest::Test::Tail::Two: { - pValue = 2.*gsl_cdf_gaussian_P(value - mean, sp); + pValue.append(2.*gsl_cdf_gaussian_P(value - mean, sp)); printLine(0, i18n("Null Hypothesis: Population mean of %1 %2 Population mean of %3 ", col1Name, UTF8_QSTRING("="), col2Name), "blue"); printLine(1, i18n("Alternate Hypothesis: Population mean of %1 %2 Population mean of %3 ", col1Name, UTF8_QSTRING("≠"), col2Name), "blue"); break; } } break; } case HypothesisTest::Test::Type::Anova: case HypothesisTest::Test::Type::NoneType: break; } - if (pValue > 1) + if (pValue[0] > 1) return 1; - return pValue; + return pValue[0]; } int HypothesisTestPrivate::setSpanValues(HypothesisTestPrivate::Node* root, int& totalLevels) { if (root == nullptr) { totalLevels = 0; return 0; } int val = 0; int level = 0; int maxLevel = 0; for (int i = 0; i < root->children.size(); i++) { val += setSpanValues(root->children[i], level); maxLevel = std::max(level, maxLevel); } totalLevels = maxLevel + 1; if (val == 0) root->spanCount = 1; else root->spanCount = val; return root->spanCount; } QString HypothesisTestPrivate::getHtmlHeader(HypothesisTestPrivate::Node *root) { if (root == nullptr) return QString(); QString header; int totalLevels = 0; setSpanValues(root, totalLevels); totalLevels -= 1; root->level = 0; QQueue nodeQueue; for (int i = 0; i < root->children.size(); i++) { Node* child = root->children[i]; child->level = 1; nodeQueue.enqueue(child); } int prevLevel = 1; header = " "; header += " "; while(!nodeQueue.isEmpty()) { Node* node = nodeQueue.dequeue(); int nodeLevel = node->level; for (int i = 0; i < node->children.size(); i++) { Node* child = node->children[i]; child->level = nodeLevel + 1; nodeQueue.enqueue(child); } if (nodeLevel != prevLevel) { prevLevel = nodeLevel; header += " "; header += " "; } header += " " + node->data + ""; } header += " "; return header; } QString HypothesisTestPrivate::getHtmlTable2(int rowCount, int columnCount, Node* columnHeaderRoot, QVariant* rowMajor) { if (rowCount < 1 || columnCount < 1) return QString(); QString table; table = "" ""; table += getHtmlHeader(columnHeaderRoot); for (int i = 0; i < rowCount; i++) { table += " "; table += " "; for (int j = 1; j < columnCount; j++) table += " "; table += " "; } table += "
" + round(rowMajor[i*columnCount]) + "" + round(rowMajor[i*columnCount + j]) + "
"; return table; } QString HypothesisTestPrivate::getHtmlTable(int row, int column, QVariant* rowMajor) { if (row < 1 || column < 1) return QString(); QString table; table = "" "" " "; QString bg = "tg-0pky"; bool pky = true; QString element; table += " "; for (int j = 0; j < column; j++) { element = rowMajor[j].toString(); table += " "; } table += " "; if (pky) bg = "tg-0pky"; else bg = "tg-btxf"; pky = !pky; for (int i = 1; i < row; i++) { table += " "; QString element = round(rowMajor[i*column]); table += " "; for (int j = 1; j < column; j++) { element = round(rowMajor[i*column+j]); table += " "; } table += " "; if (pky) bg = "tg-0pky"; else bg = "tg-btxf"; pky = !pky; } table += "
" + i18n("%1", element) + "
" + i18n("%1", element) + "" + i18n("%1", element) + "
"; return table; } QString HypothesisTestPrivate::getHtmlTable3(const QList& rowMajor) { int rowMajorSize = rowMajor.size(); if (rowMajorSize == 0) return QString(); QString table; table = ""; + ""; + + table += "
"; table += " "; int prevLevel = 0; for (int i = 0; i < rowMajorSize; i++) { Cell* currCell = rowMajor[i]; if (currCell->level != prevLevel) { table += " "; table += " "; prevLevel = currCell->level; } QString cellStartTag = ""; + table += "
isHeader) { cellStartTag = "" + i18n("%1", currCell->data) + cellEndTag; } table += "
"; return table; } QString HypothesisTestPrivate::getLine(const QString& msg, const QString& color) { return "

" + i18n("%1", msg) + "

"; } void HypothesisTestPrivate::printLine(const int& index, const QString& msg, const QString& color) { if (index < 0 || index >= 10) return; resultLine[index]->setText(getLine(msg, color)); return; } void HypothesisTestPrivate::printTooltip(const int &index, const QString &msg) { if (index < 0 || index >= 10) return; resultLine[index]->setToolTip(i18n("%1", msg)); } void HypothesisTestPrivate::printError(const QString& errorMsg) { printLine(0, errorMsg, "red"); } -void HypothesisTestPrivate::clearSummaryLayout() { - for (int i = 0; i < 10; i++) - resultLine[i]->clear(); -} - -void HypothesisTestPrivate::clearTestView() { - statsTable = ""; - clearSummaryLayout(); -} - /********************************************************************************** * virtual functions implementations * ********************************************************************************/ /*! Saves as XML. */ void HypothesisTest::save(QXmlStreamWriter* writer) const { writer->writeStartElement("hypothesisTest"); writeBasicAttributes(writer); writeCommentElement(writer); writer->writeEndElement(); } /*! Loads from XML. */ bool HypothesisTest::load(XmlStreamReader* reader, bool preview) { Q_UNUSED(preview); if (!readBasicAttributes(reader)) return false; return !reader->hasError(); } Spreadsheet *HypothesisTest::dataSourceSpreadsheet() const { return d->dataSourceSpreadsheet; } bool HypothesisTest::exportView() const { return true; } bool HypothesisTest::printView() { return true; } bool HypothesisTest::printPreview() const { return true; } /*! Constructs a primary view on me. This method may be called multiple times during the life time of an Aspect, or it might not get called at all. Aspects must not depend on the existence of a view for their operation. */ QWidget* HypothesisTest::view() const { if (!m_partView) { m_view = new HypothesisTestView(const_cast(this)); m_partView = m_view; } return m_partView; } /*! Returns a new context menu. The caller takes ownership of the menu. */ QMenu* HypothesisTest::createContextMenu() { QMenu* menu = AbstractPart::createContextMenu(); // Q_ASSERT(menu); // emit requestProjectContextMenu(menu); return menu; } diff --git a/src/backend/hypothesisTest/HypothesisTest.h b/src/backend/hypothesisTest/HypothesisTest.h index 33aef536d..3288fca9c 100644 --- a/src/backend/hypothesisTest/HypothesisTest.h +++ b/src/backend/hypothesisTest/HypothesisTest.h @@ -1,126 +1,126 @@ /*************************************************************************** File : HypothesisTest.h Project : LabPlot Description : Doing Hypothesis-Test on data provided -------------------------------------------------------------------- Copyright : (C) 2019 Devanshu Agarwal(agarwaldevanshu8@gmail.com) ***************************************************************************/ /*************************************************************************** * * * This program is free software; you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * * the Free Software Foundation; either version 2 of the License, or * * (at your option) any later version. * * * * This program is distributed in the hope that it will be useful, * * but WITHOUT ANY WARRANTY; without even the implied warranty of * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * * GNU General Public License for more details. * * * * You should have received a copy of the GNU General Public License * * along with this program; if not, write to the Free Software * * Foundation, Inc., 51 Franklin Street, Fifth Floor, * * Boston, MA 02110-1301 USA * * * ***************************************************************************/ #ifndef HYPOTHESISTEST_H #define HYPOTHESISTEST_H #include "backend/core/AbstractPart.h" #include "backend/lib/macros.h" class HypothesisTestPrivate; class HypothesisTestView; class Spreadsheet; class QString; class Column; class QVBoxLayout; class QLabel; class HypothesisTest : public AbstractPart { Q_OBJECT public: explicit HypothesisTest(const QString& name); ~HypothesisTest() override; struct Test { enum Type { NoneType = 0, TTest = 1 << 0, ZTest = 1 << 1, Anova = 1 << 2 }; enum SubType { NoneSubType = 0, TwoSampleIndependent = 1 << 0, TwoSamplePaired = 1 << 1, OneSample = 1 << 2, OneWay = 1 << 3, TwoWay = 1 << 4 }; enum Tail {Positive, Negative, Two}; Type type = NoneType; SubType subtype = NoneSubType; Tail tail; }; enum DataSourceType {DataSourceSpreadsheet, DataSourceDatabase}; void setDataSourceType(DataSourceType type); DataSourceType dataSourceType() const; void setDataSourceSpreadsheet(Spreadsheet* spreadsheet); Spreadsheet* dataSourceSpreadsheet() const; void setColumns(const QVector& cols); void setColumns(QStringList cols); QStringList allColumns(); void setPopulationMean(QVariant populationMean); void setSignificanceLevel(QVariant alpha); QString testName(); QString statsTable(); void performTest(Test m_test, bool categoricalVariable = true, bool equalVariance = true); // void performTwoSampleIndependentTTest(bool categorical_variable, bool equal_variance); // void performTwoSamplePairedTTest(); // void performOneSampleTTest(); // void performTwoSampleIndependentZTest(); // void performTwoSamplePairedZTest(); // void performOneSampleZTest(); // void performOneWayAnova(); void performLeveneTest(bool categorical_variable); - double statisticValue(); - double pValue(); + QList statisticValue(); + QList pValue(); QVBoxLayout* summaryLayout(); //virtual methods // QIcon icon() const override; QMenu* createContextMenu() override; QWidget* view() const override; bool exportView() const override; bool printView() override; bool printPreview() const override; void save(QXmlStreamWriter*) const override; bool load(XmlStreamReader*, bool preview) override; private: HypothesisTestPrivate* const d; mutable HypothesisTestView* m_view{nullptr}; friend class HypothesisTestPrivate; signals: void changed(); void requestProjectContextMenu(QMenu*); void dataSourceTypeChanged(HypothesisTest::DataSourceType); void dataSourceSpreadsheetChanged(Spreadsheet*); }; #endif // HypothesisTest_H diff --git a/src/backend/hypothesisTest/HypothesisTestPrivate.h b/src/backend/hypothesisTest/HypothesisTestPrivate.h index 16c09b555..35c9b93be 100644 --- a/src/backend/hypothesisTest/HypothesisTestPrivate.h +++ b/src/backend/hypothesisTest/HypothesisTestPrivate.h @@ -1,128 +1,125 @@ /*************************************************************************** File : HypothesisTestPrivate.h Project : LabPlot Description : Private members of Hypothesis Test -------------------------------------------------------------------- Copyright : (C) 2019 Devanshu Agarwal(agarwaldevanshu8@gmail.com) ***************************************************************************/ /*************************************************************************** * * * This program is free software; you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * * the Free Software Foundation; either version 2 of the License, or * * (at your option) any later version. * * * * This program is distributed in the hope that it will be useful, * * but WITHOUT ANY WARRANTY; without even the implied warranty of * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * * GNU General Public License for more details. * * * * You should have received a copy of the GNU General Public License * * along with this program; if not, write to the Free Software * * Foundation, Inc., 51 Franklin Street, Fifth Floor, * * Boston, MA 02110-1301 USA * * * ***************************************************************************/ #ifndef HYPOTHESISTESTPRIVATE_H #define HYPOTHESISTESTPRIVATE_H #include class QStandardItemModel; class HypothesisTestPrivate { public: explicit HypothesisTestPrivate(HypothesisTest*); virtual ~HypothesisTestPrivate(); struct Node { QString data; int spanCount; int level; QVector children; void addChild(Node* child) { children.push_back(child); } }; struct Cell { QString data; int level; int rowSpanCount; int columnSpanCount; bool isHeader; Cell(QVariant data = "", int level = 0, bool isHeader = false, int rowSpanCount = 1, int columnSpanCount = 1) { this->data = data.toString(); this->level = level; this->isHeader = isHeader; this->rowSpanCount = rowSpanCount; this->columnSpanCount = columnSpanCount; } }; enum ErrorType {ErrorUnqualSize, ErrorEmptyColumn, NoError}; QString name() const; void setDataSourceSpreadsheet(Spreadsheet* spreadsheet); void setColumns(QStringList cols); void performTwoSampleIndependentTest(HypothesisTest::Test::Type test, bool categoricalVariable = false, bool equalVariance = true); void performTwoSamplePairedTest(HypothesisTest::Test::Type test); void performOneSampleTest(HypothesisTest::Test::Type test); void performOneWayAnova(); void performTwoWayAnova(); void performLeveneTest(bool categoricalVariable); HypothesisTest* const q; HypothesisTest::DataSourceType dataSourceType{HypothesisTest::DataSourceSpreadsheet}; Spreadsheet* dataSourceSpreadsheet{nullptr}; QVector columns; QStringList allColumns; // int rowCount{0}; // int columnCount{0}; QString currTestName{"Result Table"}; double populationMean; double significanceLevel; QString statsTable; HypothesisTest::Test::Tail tailType; - double pValue{0}; - double statisticValue{0}; + QList pValue; + QList statisticValue; QVBoxLayout* summaryLayout{nullptr}; QLabel* resultLine[10]; private: bool isNumericOrInteger(Column* column); QString round(QVariant number, int precision = 3); void countPartitions(Column* column, int& np, int& totalRows); ErrorType findStats(const Column* column,int& count, double& sum, double& mean, double& std); ErrorType findStatsPaired(const Column* column1, const Column* column2, int& count, double& sum, double& mean, double& std); ErrorType findStatsCategorical(Column* column1, Column* column2, int n[], double sum[], double mean[], double std[], QMap& colName, const int& np, const int& totalRows); double getPValue(const HypothesisTest::Test::Type& test, double& value, const QString& col1Name, const QString& col2name, const double mean, const double sp, const int df); int setSpanValues(Node* root, int& totalLevels); QString getHtmlHeader(Node* root); QString getHtmlTable2(int rowCount, int columnCount, Node* columnHeaderRoot, QVariant* rowMajor); QString getHtmlTable(int row, int column, QVariant* rowMajor); QString getHtmlTable3(const QList& rowMajor); QString getLine(const QString& msg, const QString& color = "black"); void printLine(const int& index, const QString& msg, const QString& color = "black"); void printTooltip(const int& index, const QString& msg); void printError(const QString& errorMsg); - void clearTestView(); - - void clearSummaryLayout(); bool m_dbCreated{false}; }; #endif diff --git a/src/kdefrontend/dockwidgets/HypothesisTestDock.cpp.autosave b/src/kdefrontend/dockwidgets/HypothesisTestDock.cpp.autosave deleted file mode 100644 index 7bd01402f..000000000 --- a/src/kdefrontend/dockwidgets/HypothesisTestDock.cpp.autosave +++ /dev/null @@ -1,876 +0,0 @@ -/*************************************************************************** - File : HypothesisTestDock.cpp - Project : LabPlot - Description : widget for hypothesis test properties - -------------------------------------------------------------------- - Copyright : (C) 2019 Devanshu Agarwal(agarwaldevanshu8@gmail.com) - - ***************************************************************************/ - -/*************************************************************************** - * * - * This program is free software; you can redistribute it and/or modify * - * it under the terms of the GNU General Public License as published by * - * the Free Software Foundation; either version 2 of the License, or * - * (at your option) any later version. * - * * - * This program is distributed in the hope that it will be useful, * - * but WITHOUT ANY WARRANTY; without even the implied warranty of * - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * - * GNU General Public License for more details. * - * * - * You should have received a copy of the GNU General Public License * - * along with this program; if not, write to the Free Software * - * Foundation, Inc., 51 Franklin Street, Fifth Floor, * - * Boston, MA 02110-1301 USA * - * * - ***************************************************************************/ - -#include "HypothesisTestDock.h" -#include "backend/core/AspectTreeModel.h" -#include "backend/core/AbstractAspect.h" -#include "backend/core/Project.h" -#include "backend/spreadsheet/Spreadsheet.h" -#include "commonfrontend/widgets/TreeViewComboBox.h" -#include "kdefrontend/datasources/DatabaseManagerDialog.h" -#include "kdefrontend/datasources/DatabaseManagerWidget.h" -#include "kdefrontend/TemplateHandler.h" - -#include -#include -#include - -#include -#include -#include - -#include -#include -/*! - \class HypothesisTestDock - \brief Provides a dock (widget) for hypothesis testing: - \ingroup kdefrontend -*/ - -//TODO: To add tooltips in docks for non obvious widgets. -//TODO: Add functionality for database along with spreadsheet. - -HypothesisTestDock::HypothesisTestDock(QWidget* parent) : QWidget(parent) { - //QDEBUG("in hypothesis test constructor "); - ui.setupUi(this); - - ui.cbDataSourceType->addItem(i18n("Spreadsheet")); - ui.cbDataSourceType->addItem(i18n("Database")); - - cbSpreadsheet = new TreeViewComboBox; - ui.gridLayout->addWidget(cbSpreadsheet, 5, 4, 1, 3); - - ui.bDatabaseManager->setIcon(QIcon::fromTheme("network-server-database")); - ui.bDatabaseManager->setToolTip(i18n("Manage connections")); - m_configPath = QStandardPaths::standardLocations(QStandardPaths::AppDataLocation).constFirst() + "sql_connections"; - - // adding item to tests and testtype combo box; - - ui.cbTest->addItem( i18n("T Test"), HypothesisTest::Test::Type::TTest); - ui.cbTest->addItem( i18n("Z Test"), HypothesisTest::Test::Type::ZTest); - ui.cbTest->addItem( i18n("ANOVA"), HypothesisTest::Test::Type::Anova); - - ui.lPopulationSigma->setText( UTF8_QSTRING("σ")); - - // making all test blocks invisible at starting. - ui.pbLeveneTest->hide(); - ui.lCategorical->hide(); - ui.chbCategorical->hide(); - ui.lCol1->hide(); - ui.cbCol1->hide(); - ui.lCol2->hide(); - ui.cbCol2->hide(); - ui.lCol3->hide(); - ui.cbCol3->hide(); - ui.lEqualVariance->hide(); - ui.chbEqualVariance->hide(); - ui.chbEqualVariance->setChecked(true); - ui.lPopulationSigma->hide(); - ui.lPopulationSigma->setToolTip( i18n("Sigma of Population

" - "Hint: Z-Test if preffered over T-Test if this is known")); - ui.chbPopulationSigma->hide(); - ui.lePopulationSigma->hide(); - ui.pbPerformTest->setEnabled(false); - ui.rbH1OneTail2->hide(); - ui.rbH1OneTail1->hide(); - ui.rbH1TwoTail->hide(); - ui.rbH0OneTail1->hide(); - ui.rbH0OneTail2->hide(); - ui.rbH0TwoTail->hide(); - ui.lH0->hide(); - ui.lH1->hide(); - - QString mu = UTF8_QSTRING("μ"); - QString mu0 = UTF8_QSTRING("μₒ"); - - // radio button for null and alternate hypothesis - // for alternative hypothesis (h1) - // one_tail_1 is mu > mu0; one_tail_2 is mu < mu0; two_tail = mu != mu0; - - ui.rbH1OneTail1->setText( i18n("%1 %2 %3", mu, UTF8_QSTRING(">"), mu0)); - ui.rbH1OneTail2->setText( i18n("%1 %2 %3", mu, UTF8_QSTRING("<"), mu0)); - ui.rbH1TwoTail->setText( i18n("%1 %2 %3", mu, UTF8_QSTRING("≠"), mu0)); - - ui.rbH0OneTail1->setText( i18n("%1 %2 %3",mu, UTF8_QSTRING("≤"), mu0)); - ui.rbH0OneTail2->setText( i18n("%1 %2 %3", mu, UTF8_QSTRING("≥"), mu0)); - ui.rbH0TwoTail->setText( i18n("%1 %2 %3", mu, UTF8_QSTRING("="), mu0)); - - ui.rbH0TwoTail->setEnabled(false); - ui.rbH0OneTail1->setEnabled(false); - ui.rbH0OneTail2->setEnabled(false); - - - // setting muo and alpha buttons - ui.lMuo->setText( i18n("%1", mu0)); - ui.lAlpha->setText( i18n("%1", UTF8_QSTRING("α"))); - ui.leMuo->setText( i18n("%1", m_populationMean)); - ui.leAlpha->setText( i18n("%1", m_significanceLevel)); - - ui.lMuo->hide(); - ui.lMuo->setToolTip( i18n("Population Mean")); - ui.lAlpha->hide(); - ui.lAlpha->setToolTip( i18n("Significance Level")); - ui.leMuo->hide(); - ui.leAlpha->hide(); - ui.pbPerformTest->setIcon(QIcon::fromTheme("run-build")); - - ui.leMuo->setText( i18n("%1", m_populationMean)); - ui.leAlpha->setText( i18n("%1", m_significanceLevel)); - - // readConnections(); - - // auto* style = ui.bAddRow->style(); - // ui.bAddRow->setIcon(style->standardIcon(QStyle::SP_ArrowRight)); - // ui.bAddRow->setToolTip(i18n("Add the selected field to rows")); - // ui.bRemoveRow->setIcon(style->standardIcon(QStyle::SP_ArrowLeft)); - // ui.bRemoveRow->setToolTip(i18n("Remove the selected field from rows")); - - // ui.bAddColumn->setIcon(style->standardIcon(QStyle::SP_ArrowRight)); - // ui.bAddColumn->setToolTip(i18n("Add the selected field to columns")); - // ui.bRemoveColumn->setIcon(style->standardIcon(QStyle::SP_ArrowLeft)); - // ui.bRemoveColumn->setToolTip(i18n("Remove the selected field from columns")); - - // //add/remove buttons only enabled if something was selected - // ui.bAddRow->setEnabled(false); - // ui.bRemoveRow->setEnabled(false); - // ui.bAddColumn->setEnabled(false); - // ui.bRemoveColumn->setEnabled(false); - - // connect(ui.leName, &QLineEdit::textChanged, this, &HypothesisTestDock::nameChanged); - // connect(ui.leComment, &QLineEdit::textChanged, this, &HypothesisTestDock::commentChanged); - connect(ui.cbDataSourceType, static_cast(&QComboBox::currentIndexChanged), - this, &HypothesisTestDock::dataSourceTypeChanged); - - connect(cbSpreadsheet, &TreeViewComboBox::currentModelIndexChanged, this, &HypothesisTestDock::spreadsheetChanged); - // connect(ui.cbConnection, static_cast(&QComboBox::currentIndexChanged), - // this, &HypothesisTestDock::connectionChanged); - // connect(ui.cbTable, static_cast(&QComboBox::currentIndexChanged), - // this, &HypothesisTestDock::tableChanged); - // connect(ui.bDatabaseManager, &QPushButton::clicked, this, &HypothesisTestDock::showDatabaseManager); - - // connect(ui.bAddRow, &QPushButton::clicked, this, &HypothesisTestDock::addRow); - // connect(ui.bRemoveRow, &QPushButton::clicked, this,&HypothesisTestDock::removeRow); - // connect(ui.bAddColumn, &QPushButton::clicked, this, &HypothesisTestDock::addColumn); - // connect(ui.bRemoveColumn, &QPushButton::clicked, this,&HypothesisTestDock::removeColumn); - - // connect(ui.cbCol1, static_cast(&QComboBox::currentIndexChanged), this, &HypothesisTestDock::doTTest); - // connect(ui.cbCol2, static_cast(&QComboBox::currentIndexChanged), this, &HypothesisTestDock::doTTest); - - // connect(ui.lwFields, &QListWidget::itemSelectionChanged, this, [=]() { - // bool enabled = !ui.lwFields->selectedItems().isEmpty(); - // ui.bAddRow->setEnabled(enabled); - // ui.bAddColumn->setEnabled(enabled); - // }); - - // connect(ui.lwRows, &QListWidget::doubleClicked, this,&HypothesisTestDock::removeRow); - // connect(ui.lwRows, &QListWidget::itemSelectionChanged, this, [=]() { - // ui.bRemoveRow->setEnabled(!ui.lwRows->selectedItems().isEmpty()); - // }); - - // connect(ui.lwColumns, &QListWidget::doubleClicked, this,&HypothesisTestDock::removeColumn); - // connect(ui.lwColumns, &QListWidget::itemSelectionChanged, this, [=]() { - // ui.bRemoveColumn->setEnabled(!ui.lwColumns->selectedItems().isEmpty()); - // }); - - connect(ui.cbTest, static_cast(&QComboBox::currentIndexChanged), this, &HypothesisTestDock::showTestType); - connect(ui.cbTestType, static_cast(&QComboBox::currentIndexChanged), this, &HypothesisTestDock::showHypothesisTest); - // connect(ui.cbTest, static_cast(&QComboBox::currentIndexChanged), this, &HypothesisTestDock::showHypothesisTest); - // connect(ui.cbTestType, static_cast(&QComboBox::currentIndexChanged), this, &HypothesisTestDock::showHypothesisTest); - connect(ui.pbPerformTest, &QPushButton::clicked, this, &HypothesisTestDock::doHypothesisTest); - connect(ui.pbLeveneTest, &QPushButton::clicked, this, &HypothesisTestDock::performLeveneTest); - - - //connecting null hypothesis and alternate hypothesis radio button - connect(ui.rbH1OneTail1, &QRadioButton::toggled, this, &HypothesisTestDock::onRbH1OneTail1Toggled); - connect(ui.rbH1OneTail2, &QRadioButton::toggled, this, &HypothesisTestDock::onRbH1OneTail2Toggled); - connect(ui.rbH1TwoTail, &QRadioButton::toggled, this, &HypothesisTestDock::onRbH1TwoTailToggled); - - connect(ui.cbCol1, static_cast(&QComboBox::currentIndexChanged), this, &HypothesisTestDock::col1IndexChanged); - connect(ui.chbCategorical, &QCheckBox::stateChanged, this, &HypothesisTestDock::changeCbCol2Label); - - connect(ui.chbPopulationSigma, &QCheckBox::stateChanged, this, &HypothesisTestDock::chbPopulationSigmaStateChanged); - - ui.cbTest->setCurrentIndex(0); - emit ui.cbTest->currentIndexChanged(0); - ui.cbTestType->setCurrentIndex(0); - emit ui.cbTestType->currentIndexChanged(0); -} - -void HypothesisTestDock::setHypothesisTest(HypothesisTest* HypothesisTest) { - //QDEBUG("in set hypothesis test"); - m_initializing = true; - m_hypothesisTest = HypothesisTest; - - m_aspectTreeModel = new AspectTreeModel(m_hypothesisTest->project()); - - QList list; - list << "Folder" << "Workbook" << "Spreadsheet" << "LiveDataSource"; - cbSpreadsheet->setTopLevelClasses(list); - - list.clear(); - list << "Spreadsheet" << "LiveDataSource"; - m_aspectTreeModel->setSelectableAspects(list); - - cbSpreadsheet->setModel(m_aspectTreeModel); - - //show the properties - ui.leName->setText(m_hypothesisTest->name()); - ui.leComment->setText(m_hypothesisTest->comment()); - ui.cbDataSourceType->setCurrentIndex(m_hypothesisTest->dataSourceType()); - if (m_hypothesisTest->dataSourceType() == HypothesisTest::DataSourceType::DataSourceSpreadsheet) - setModelIndexFromAspect(cbSpreadsheet, m_hypothesisTest->dataSourceSpreadsheet()); - // else - // ui.cbConnection->setCurrentIndex(ui.cbConnection->findText(m_hypothesisTest->dataSourceConnection())); - - setColumnsComboBoxModel(m_hypothesisTest->dataSourceSpreadsheet()); - - this->dataSourceTypeChanged(ui.cbDataSourceType->currentIndex()); - - //setting rows and columns in combo box; - - //undo functions -// connect(m_hypothesisTest, SIGNAL(aspectDescriptionChanged(const AbstractAspect*)), this, SLOT(hypothesisTestDescriptionChanged(const AbstractAspect*))); - - m_initializing = false; - -} - -void HypothesisTestDock::showTestType() { - //QDEBUG("in show test type"); - m_test.type = HypothesisTest::Test::Type(ui.cbTest->currentData().toInt()); - - ui.cbTestType->clear(); - if (m_test.type & (HypothesisTest::Test::Type::TTest | HypothesisTest::Test::Type::ZTest)) { - ui.cbTestType->addItem( i18n("Two Sample Independent"), HypothesisTest::Test::SubType::TwoSampleIndependent); - ui.cbTestType->addItem( i18n("Two Sample Paired"), HypothesisTest::Test::SubType::TwoSamplePaired); - ui.cbTestType->addItem( i18n("One Sample"), HypothesisTest::Test::SubType::OneSample); - } else if (m_test.type & HypothesisTest::Test::Type::Anova) { - ui.cbTestType->addItem( i18n("One Way"), HypothesisTest::Test::SubType::OneWay); - ui.cbTestType->addItem( i18n("Two Way"), HypothesisTest::Test::SubType::TwoWay); - } -} - -void HypothesisTestDock::showHypothesisTest() { - //QDEBUG("in showHypothesisTest"); - - if (ui.cbTestType->count() == 0) - return; - - m_test.subtype = HypothesisTest::Test::SubType(ui.cbTestType->currentData().toInt()); - - ui.lCol1->show(); - ui.cbCol1->show(); - - ui.lCol2->setVisible(bool(m_test.subtype & (~HypothesisTest::Test::SubType::OneSample))); - ui.cbCol2->setVisible(bool(m_test.subtype & (~HypothesisTest::Test::SubType::OneSample))); - - ui.lCol3->setVisible(bool(m_test.type & (HypothesisTest::Test::Anova) & - setAllBits(m_test.subtype & HypothesisTest::Test::SubType::TwoWay))); - ui.cbCol3->setVisible(bool(m_test.type & (HypothesisTest::Test::Anova) & - setAllBits(m_test.subtype & HypothesisTest::Test::SubType::TwoWay))); - - ui.lEqualVariance->setVisible(bool( (m_test.type & HypothesisTest::Test::Type::TTest) & - (m_test.subtype & HypothesisTest::Test::SubType::TwoSampleIndependent))); - ui.chbEqualVariance->setVisible(bool( (m_test.type & HypothesisTest::Test::Type::TTest) & - (m_test.subtype & HypothesisTest::Test::SubType::TwoSampleIndependent))); - - ui.lCategorical->setVisible(bool((m_test.type & HypothesisTest::Test::Type::TTest) & - (m_test.subtype & HypothesisTest::Test::SubType::TwoSampleIndependent))); - ui.chbCategorical->setVisible(bool((m_test.type & HypothesisTest::Test::Type::TTest) & - (m_test.subtype & HypothesisTest::Test::SubType::TwoSampleIndependent))); - ui.chbEqualVariance->setChecked(true); - - ui.lPopulationSigma->setVisible(bool((m_test.type & (HypothesisTest::Test::Type::TTest | - HypothesisTest::Test::Type::ZTest)) & - ~(setAllBits(m_test.subtype & HypothesisTest::Test::SubType::OneSample)))); - - ui.chbPopulationSigma->setVisible(bool((m_test.type & (HypothesisTest::Test::Type::TTest | - HypothesisTest::Test::Type::ZTest)) & - ~(setAllBits(m_test.subtype & HypothesisTest::Test::SubType::OneSample)))); ui.chbPopulationSigma->setChecked(false); - - ui.pbLeveneTest->setVisible(bool((m_test.type & HypothesisTest::Test::Type::Anova & - setAllBits(m_test.subtype & HypothesisTest::Test::SubType::OneWay)) | - (HypothesisTest::Test::Type::TTest & - setAllBits(m_test.subtype & HypothesisTest::Test::SubType::TwoSampleIndependent)))); - - ui.lH1->setVisible(bool(m_test.type & ~HypothesisTest::Test::Type::Anova)); - ui.rbH1OneTail1->setVisible(bool(m_test.type & ~HypothesisTest::Test::Type::Anova)); - ui.rbH1OneTail2->setVisible(bool(m_test.type & ~HypothesisTest::Test::Type::Anova)); - ui.rbH1TwoTail->setVisible(bool(m_test.type & ~HypothesisTest::Test::Type::Anova)); - - ui.lH0->setVisible(bool(m_test.type & ~HypothesisTest::Test::Type::Anova)); - ui.rbH0OneTail1->setVisible(bool(m_test.type & ~HypothesisTest::Test::Type::Anova)); - ui.rbH0OneTail2->setVisible(bool(m_test.type & ~HypothesisTest::Test::Type::Anova)); - ui.rbH0TwoTail->setVisible(bool(m_test.type & ~HypothesisTest::Test::Type::Anova)); - - ui.rbH1TwoTail->setChecked(true); - - ui.lMuo->setVisible(bool(m_test.subtype & HypothesisTest::Test::SubType::OneSample)); - ui.leMuo->setVisible(bool(ui.lMuo->isVisible())); - - ui.lAlpha->show(); - ui.leAlpha->show(); - - setColumnsComboBoxView(); - - ui.pbPerformTest->setEnabled(nonEmptySelectedColumns()); - ui.pbLeveneTest->setEnabled(nonEmptySelectedColumns()); -} - -void HypothesisTestDock::doHypothesisTest() { - //QDEBUG("in doHypothesisTest"); - m_hypothesisTest->setPopulationMean(ui.leMuo->text()); - m_hypothesisTest->setSignificanceLevel(ui.leAlpha->text()); - - QVector cols; - - if (ui.cbCol1->count() == 0) - return; - - cols << reinterpret_cast(ui.cbCol1->currentData().toLongLong()); - - if (m_test.subtype & HypothesisTest::Test::SubType::TwoWay) - cols << reinterpret_cast(ui.cbCol3->currentData().toLongLong()); - - if (m_test.subtype & (~HypothesisTest::Test::SubType::OneSample)) - if (ui.cbCol2->count() > 0) - cols << reinterpret_cast(ui.cbCol2->currentData().toLongLong()); - - m_hypothesisTest->setColumns(cols); - - m_hypothesisTest->performTest(m_test, ui.chbCategorical->isChecked(), ui.chbEqualVariance->isChecked()); -} - -void HypothesisTestDock::performLeveneTest() { - QVector cols; - - if (ui.cbCol1->count() == 0 || ui.cbCol2->count() == 0) - return; - - cols << reinterpret_cast(ui.cbCol1->currentData().toLongLong()); - cols << reinterpret_cast(ui.cbCol2->currentData().toLongLong()); - m_hypothesisTest->setColumns(cols); - - m_hypothesisTest->setSignificanceLevel(ui.leAlpha->text()); - m_hypothesisTest->performLeveneTest(ui.chbCategorical->isChecked()); -} - -void HypothesisTestDock::setModelIndexFromAspect(TreeViewComboBox* cb, const AbstractAspect* aspect) { - if (aspect) - cb->setCurrentModelIndex(m_aspectTreeModel->modelIndexOfAspect(aspect)); - else - cb->setCurrentModelIndex(QModelIndex()); -} - -///*! -// shows the database manager where the connections are created and edited. -// The selected connection is selected in the connection combo box in this widget. -//**/ -//void HypothesisTestDock::showDatabaseManager() { -// DatabaseManagerDialog* dlg = new DatabaseManagerDialog(this, ui.cbConnection->currentText()); - -// if (dlg->exec() == QDialog::Accepted) { -// //re-read the available connections to be in sync with the changes in DatabaseManager -// m_initializing = true; -// ui.cbConnection->clear(); -// readConnections(); - -// //select the connection the user has selected in DatabaseManager -// const QString& conn = dlg->connection(); -// ui.cbConnection->setCurrentIndex(ui.cbConnection->findText(conn)); -// m_initializing = false; - -// connectionChanged(); -// } - -// delete dlg; -//} - -///*! -// loads all available saved connections -//*/ -//void HypothesisTestDock::readConnections() { -// DEBUG("ImportSQLDatabaseWidget: reading available connections"); -// KConfig config(m_configPath, KConfig::SimpleConfig); -// for (const auto& name : config.groupList()) -// ui.cbConnection->addItem(name); -//} - -///*! -// * adds the selected field to the rows -// */ -//void HypothesisTestDock::addRow() { -// QString field = ui.lwFields->currentItem()->text(); -// ui.lwRows->addItem(field); -// ui.lwFields->takeItem(ui.lwFields->currentRow()); -// m_hypothesisTest->addToRows(field); -//} - -///*! -// * removes the selected field from the rows -// */ -//void HypothesisTestDock::removeRow() { -// const QString& field = ui.lwRows->currentItem()->text(); -// ui.lwRows->takeItem(ui.lwRows->currentRow()); -// m_hypothesisTest->removeFromRows(field); -// updateFields(); -//} - -///*! -// * adds the selected field to the columns -// */ -//void HypothesisTestDock::addColumn() { -// QString field = ui.lwFields->currentItem()->text(); -// ui.lwColumns->addItem(field); -// ui.lwFields->takeItem(ui.lwFields->currentRow()); -// m_hypothesisTest->addToColumns(field); -//} - -///*! -// * removes the selected field from the columns -// */ -//void HypothesisTestDock::removeColumn() { -// const QString& field = ui.lwColumns->currentItem()->text(); -// ui.lwColumns->takeItem(ui.lwColumns->currentRow()); -// m_hypothesisTest->removeFromColumns(field); -// updateFields(); -//} - -///*! -// * re-populates the content of the "Fields" list widget by adding the non-selected fields only. -// * called when a selected field is removed from rows or columns. -// */ -//void HypothesisTestDock::updateFields() { -// ui.lwFields->clear(); -// for (auto dimension : m_hypothesisTest->dimensions()) -// if (!fieldSelected(dimension)) -// ui.lwFields->addItem(new QListWidgetItem(QIcon::fromTheme("draw-text"), dimension)); - -// for (auto measure : m_hypothesisTest->measures()) -// if (!fieldSelected(measure)) -// ui.lwFields->addItem(new QListWidgetItem(measure)); -//} - -///*! -// * return \c true if the field name \c field was selected among rows or columns, -// * return \c false otherwise. -// * */ -//bool HypothesisTestDock::fieldSelected(const QString& field) { -// for (int i = 0; icount(); ++i) -// if (ui.lwRows->item(i)->text() == field) -// return true; - -// for (int i = 0; icount(); ++i) -// if (ui.lwColumns->item(i)->text() == field) -// return true; - -// return false; -//} - -////************************************************************* -////****** SLOTs for changes triggered in HypothesisTestDock ******* -////************************************************************* -//void HypothesisTestDock::nameChanged() { -// if (m_initializing) -// return; - -// m_hypothesisTest->setName(ui.leName->text()); -//} - -//void HypothesisTestDock::commentChanged() { -// if (m_initializing) -// return; - -// m_hypothesisTest->setComment(ui.leComment->text()); -//} - -void HypothesisTestDock::dataSourceTypeChanged(int index) { - //QDEBUG("in dataSourceTypeChanged"); - HypothesisTest::DataSourceType type = static_cast(index); - bool showDatabase = (type == HypothesisTest::DataSourceType::DataSourceDatabase); - ui.lSpreadsheet->setVisible(!showDatabase); - cbSpreadsheet->setVisible(!showDatabase); - ui.lConnection->setVisible(showDatabase); - ui.cbConnection->setVisible(showDatabase); - ui.bDatabaseManager->setVisible(showDatabase); - ui.lTable->setVisible(showDatabase); - ui.cbTable->setVisible(showDatabase); - - if (m_initializing) - return; - - m_hypothesisTest->setComment(ui.leComment->text()); - -} - -void HypothesisTestDock::spreadsheetChanged(const QModelIndex& index) { - //QDEBUG("in spreadsheetChanged"); - auto* aspect = static_cast(index.internalPointer()); - Spreadsheet* spreadsheet = dynamic_cast(aspect); - setColumnsComboBoxModel(spreadsheet); - m_hypothesisTest->setDataSourceSpreadsheet(spreadsheet); -} - -void HypothesisTestDock::changeCbCol2Label() { - //QDEBUG("in changeCbCol2Label"); - if ( (m_test.type & ~HypothesisTest::Test::Type::Anova) & (m_test.subtype & ~HypothesisTest::Test::SubType::TwoSampleIndependent)) { - ui.lCol2->setText( i18n("Independent Var. 2")); - return; - } - - if (ui.cbCol1->count() == 0) return; - - QString selected_text = ui.cbCol1->currentText(); - Column* col1 = m_hypothesisTest->dataSourceSpreadsheet()->column(selected_text); - - if (!ui.chbCategorical->isChecked() && (col1->columnMode() == AbstractColumn::Integer || col1->columnMode() == AbstractColumn::Numeric)) { - ui.lCol2->setText( i18n("Independent Var. 2")); - ui.chbCategorical->setChecked(false); - ui.chbCategorical->setEnabled(true); - } else { - ui.lCol2->setText( i18n("Dependent Var. 1")); - if (!ui.chbCategorical->isChecked()) - ui.chbCategorical->setEnabled(false); - else - ui.chbCategorical->setEnabled(true); - ui.chbCategorical->setChecked(true); - } -} - -void HypothesisTestDock::chbPopulationSigmaStateChanged() { - if (ui.chbPopulationSigma->isVisible() && ui.chbPopulationSigma->isChecked()) - ui.lePopulationSigma->show(); - else - ui.lePopulationSigma->hide(); -} - -void HypothesisTestDock::col1IndexChanged(int index) { - if (index < 0) return; - changeCbCol2Label(); -} - - -//void HypothesisTestDock::connectionChanged() { -// if (ui.cbConnection->currentIndex() == -1) { -// ui.lTable->hide(); -// ui.cbTable->hide(); -// return; -// } - -// //clear the previously shown tables -// ui.cbTable->clear(); -// ui.lTable->show(); -// ui.cbTable->show(); - -// const QString& connection = ui.cbConnection->currentText(); - -// //connection name was changed, determine the current connections settings -// KConfig config(m_configPath, KConfig::SimpleConfig); -// KConfigGroup group = config.group(connection); - -// //close and remove the previos connection, if available -// if (m_db.isOpen()) { -// m_db.close(); -// QSqlDatabase::removeDatabase(m_db.driverName()); -// } - -// //open the selected connection -// //QDEBUG("HypothesisTestDock: connecting to " + connection); -// const QString& driver = group.readEntry("Driver"); -// m_db = QSqlDatabase::addDatabase(driver); - -// const QString& dbName = group.readEntry("DatabaseName"); -// if (DatabaseManagerWidget::isFileDB(driver)) { -// if (!QFile::exists(dbName)) { -// KMessageBox::error(this, i18n("Couldn't find the database file '%1'. Please check the connection settings.", dbName), -// appendRow i18n("Connection Failed")); -// return; -// } else -// m_db.setDatabaseName(dbName); -// } else if (DatabaseManagerWidget::isODBC(driver)) { -// if (group.readEntry("CustomConnectionEnabled", false)) -// m_db.setDatabaseName(group.readEntry("CustomConnectionString")); -// else -// m_db.setDatabaseName(dbName); -// } else { -// m_db.setDatabaseName(dbName); -// m_db.setHostName( group.readEntry("HostName") ); -// m_db.setPort( group.readEntry("Port", 0) ); -// m_db.setUserName( group.readEntry("UserName") ); -// m_db.setPassword( group.readEntry("Password") ); -// } - -// WAIT_CURSOR; -// if (!m_db.open()) { -// RESET_CURSOR; -// KMessageBox::error(this, i18n("Failed to connect to the database '%1'. Please check the connection settings.", ui.cbConnection->currentText()) + -// QLatin1String("\n\n") + m_db.lastError().databaseText(), -// i18n("Connection Failed")); -// return; -// } - -// //show all available database tables -// if (m_db.tables().size()) { -// for (auto table : m_db.tables()) -// ui.cbTable->addItem(QIcon::fromTheme("view-form-table"), table); -// ui.cbTable->setCurrentIndex(0); -// } - -// RESET_CURSOR; - -// if (m_initializing) -// return; - -//// m_hypothesisTest->setDataSourceConnection(connection); -//} - -//void HypothesisTestDock::tableChanged() { -// const QString& table = ui.cbTable->currentText(); - -// //show all attributes of the selected table -//// for (const auto* col : spreadsheet->children()) { -//// QListWidgetItem* item = new QListWidgetItem(col->icon(), col->name()); -//// ui.lwFields->addItem(item); -//// } - -// if (m_initializing) -// return; - -//// m_hypothesisTest->setDataSourceTable(table); -//} - -////************************************************************* -////******** SLOTs for changes triggered in Spreadsheet ********* -////************************************************************* -void HypothesisTestDock::hypothesisTestDescriptionChanged(const AbstractAspect* aspect) { - //QDEBUG("in hypothesisTestDescriptionChanged"); - - if (m_hypothesisTest != aspect) - return; - - m_initializing = true; - if (aspect->name() != ui.leName->text()) - ui.leName->setText(aspect->name()); - else if (aspect->comment() != ui.leComment->text()) - ui.leComment->setText(aspect->comment()); - - m_initializing = false; -} - -////************************************************************* -////******************** SETTINGS ******************************* -////************************************************************* -//void HypothesisTestDock::load() { - -//} - -//void HypothesisTestDock::loadConfigFromTemplate(KConfig& config) { -// Q_UNUSED(config); -//} - -///*! -// loads saved matrix properties from \c config. -// */ -//void HypothesisTestDock::loadConfig(KConfig& config) { -// Q_UNUSED(config); -//} - -///*! -// saves matrix properties to \c config. -// */ -//void HypothesisTestDock::saveConfigAsTemplate(KConfig& config) { -// Q_UNUSED(config); -//} - -//TODO: Rather than inbuilt slots use own decided slots for checked rather than clicked - -// for alternate hypothesis -// one_tail_1 is mu > mu0; one_tail_2 is mu < mu0; two_tail = mu != mu0; -void HypothesisTestDock::onRbH1OneTail1Toggled(bool checked) { - if (!checked) return; - ui.rbH0OneTail1->setChecked(true); - m_test.tail = HypothesisTest::Test::Tail::Positive; -} - -void HypothesisTestDock::onRbH1OneTail2Toggled(bool checked) { - if (!checked) return; - ui.rbH0OneTail2->setChecked(true); - m_test.tail = HypothesisTest::Test::Tail::Negative; - -} - -void HypothesisTestDock::onRbH1TwoTailToggled(bool checked) { - if (!checked) return; - ui.rbH0TwoTail->setChecked(true); - m_test.tail = HypothesisTest::Test::Tail::Two; -} - - -/**************************************Helper Functions********************************************/ -void HypothesisTestDock::countPartitions(Column *column, int &np, int &total_rows) { - total_rows = column->rowCount(); - np = 0; - QString cell_value; - QMap discovered_categorical_var; - - AbstractColumn::ColumnMode original_col_mode = column->columnMode(); - column->setColumnMode(AbstractColumn::Text); - - for (int i = 0; i < total_rows; i++) { - cell_value = column->textAt(i); - - if (cell_value.isEmpty()) { - total_rows = i; - break; - } - - if (discovered_categorical_var[cell_value]) - continue; - - discovered_categorical_var[cell_value] = true; - np++; - } - column->setColumnMode(original_col_mode); -} - -void HypothesisTestDock::setColumnsComboBoxModel(Spreadsheet* spreadsheet) { - m_onlyValuesCols.clear(); - m_twoCategoricalCols.clear(); - m_multiCategoricalCols.clear(); - - for (auto* col : spreadsheet->children()) { - if (col->columnMode() == AbstractColumn::Integer || col->columnMode() == AbstractColumn::Numeric) - m_onlyValuesCols.append(col); - else { - int np = 0, n_rows = 0; - countPartitions(col, np, n_rows); - if (np <= 1) - continue; - else if (np == 2) - m_twoCategoricalCols.append(col); - else - m_multiCategoricalCols.append(col); - } - } - setColumnsComboBoxView(); - showHypothesisTest(); -} - - -//TODO: change from if else to switch case: -void HypothesisTestDock::setColumnsComboBoxView() { - - ui.cbCol1->clear(); - ui.cbCol2->clear(); - ui.cbCol3->clear(); - - QList::iterator i; - - switch (m_test.type) { - case (HypothesisTest::Test::Type::ZTest): - case (HypothesisTest::Test::Type::TTest): { - switch (m_test.subtype) { - case (HypothesisTest::Test::SubType::TwoSampleIndependent): { - for (i = m_onlyValuesCols.begin(); i != m_onlyValuesCols.end(); i++) { - ui.cbCol1->addItem( (*i)->name(), qint64(*i)); - ui.cbCol2->addItem( (*i)->name(), qint64(*i)); - } - for (i = m_twoCategoricalCols.begin(); i != m_twoCategoricalCols.end(); i++) - ui.cbCol1->addItem( (*i)->name(), qint64(*i)); - break; - } - case (HypothesisTest::Test::SubType::TwoSamplePaired): { - for (i = m_onlyValuesCols.begin(); i != m_onlyValuesCols.end(); i++) { - ui.cbCol1->addItem( (*i)->name(), qint64(*i)); - ui.cbCol2->addItem( (*i)->name(), qint64(*i)); - } - break; - } - case (HypothesisTest::Test::SubType::OneSample): { - for (i = m_onlyValuesCols.begin(); i != m_onlyValuesCols.end(); i++) - ui.cbCol1->addItem( (*i)->name(), qint64(*i)); - break; - } - case HypothesisTest::Test::SubType::OneWay: - case HypothesisTest::Test::SubType::TwoWay: - case HypothesisTest::Test::SubType::NoneSubType: - break; - } - break; - } - case HypothesisTest::Test::Type::Anova: { - switch (m_test.subtype) { - case HypothesisTest::Test::SubType::OneWay: { - for (i = m_onlyValuesCols.begin(); i != m_onlyValuesCols.end(); i++) - ui.cbCol2->addItem( (*i)->name(), qint64(*i)); - for (i = m_twoCategoricalCols.begin(); i != m_twoCategoricalCols.end(); i++) - ui.cbCol1->addItem( (*i)->name(), qint64(*i)); - for (i = m_multiCategoricalCols.begin(); i != m_multiCategoricalCols.end(); i++) - ui.cbCol1->addItem( (*i)->name(), qint64(*i)); - break; - } - case HypothesisTest::Test::SubType::TwoWay: { - for (i = m_onlyValuesCols.begin(); i != m_onlyValuesCols.end(); i++) - ui.cbCol2->addItem( (*i)->name(), qint64(*i)); - - for (i = m_twoCategoricalCols.begin(); i != m_twoCategoricalCols.end(); i++) { - ui.cbCol1->addItem( (*i)->name(), qint64(*i)); - ui.cbCol3->addItem( (*i)->name(), qint64(*i)); - } - for (i = m_multiCategoricalCols.begin(); i != m_multiCategoricalCols.end(); i++) { - ui.cbCol1->addItem( (*i)->name(), qint64(*i)); - ui.cbCol3->addItem( (*i)->name(), qint64(*i)); - } - break; - } - case HypothesisTest::Test::SubType::TwoSampleIndependent: - case HypothesisTest::Test::SubType::TwoSamplePaired: - case HypothesisTest::Test::SubType::OneSample: - case HypothesisTest::Test::SubType::NoneSubType: - break; - } - break; - } - case HypothesisTest::Test::Type::NoneType: - break; - } -} - -bool HypothesisTestDock::nonEmptySelectedColumns() { - if (ui.cbCol1->isVisible() && ui.cbCol1->count() < 1) - return false; - if (ui.cbCol2->isVisible() && ui.cbCol2->count() < 1) - return false; - return true; -} - -uint8_t HypothesisTestDock::setAllBits(const uint8_t& bits) { - if (!bits) - return 0; - return ~(bits & (bits-1)); -} diff --git a/tests/stats/CMakeLists.txt b/tests/stats/CMakeLists.txt index 07faef219..54d2f5537 100644 --- a/tests/stats/CMakeLists.txt +++ b/tests/stats/CMakeLists.txt @@ -1 +1,2 @@ -add_subdirectory(ttest) \ No newline at end of file +add_subdirectory(ttest) +add_subdirectory(anova) diff --git a/tests/stats/anova/AnovaTest.cpp b/tests/stats/anova/AnovaTest.cpp new file mode 100644 index 000000000..c393caa1c --- /dev/null +++ b/tests/stats/anova/AnovaTest.cpp @@ -0,0 +1,183 @@ +/*************************************************************************** + File : AnovaTest.cpp + Project : LabPlot + Description : Tests for data correlation + -------------------------------------------------------------------- + Copyright : (C) 2019 Devanshu Agarwal (agarwaldevanshu8@gmail.com) + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + * This program is distributed in the hope that it will be useful, * + * but WITHOUT ANY WARRANTY; without even the implied warranty of * + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * + * GNU General Public License for more details. * + * * + * You should have received a copy of the GNU General Public License * + * along with this program; if not, write to the Free Software * + * Foundation, Inc., 51 Franklin Street, Fifth Floor, * + * Boston, MA 02110-1301 USA * + * * + ***************************************************************************/ + +#include "AnovaTest.h" +#include "backend/hypothesisTest/HypothesisTest.h" + +#include "backend/core/AbstractColumn.h" +#include "backend/core/column/Column.h" + +void AnovaTest::oneWayAnova_data() { + QTest::addColumn>("col1Data"); + QTest::addColumn>("col2Data"); + QTest::addColumn("fValue_expected"); + QTest::addColumn("pValue_expected"); + + // First Sample + QVector col1Data = {"1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", + "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", + "3", "3", "3", "3", "3", "3", "3", "3", "3", "3", "3", "3", "3", "3", "3", "3", "3", "3", "3", "3", + "4", "4", "4", "4", "4", "4", "4", "4", "4", "4", "4", "4", "4", "4", "4", "4", "4", "4", "4", "4", + "5", "5", "5", "5", "5", "5", "5", "5", "5", "5", "5", "5", "5", "5", "5", "5", "5", "5", "5", "5"}; + + QVector col2Data = {1, 43, 15, 40, 8, 17, 30, 34, 34, 26, 1, 7, 22, 30, 40, 15, 20, 9, 14, 15, + 6, 30, 15, 30, 12, 17, 21, 23, 20, 27, -19, -18, -8, -6, -6, -9, -17, -12, -11, -6, + 5, 8, 12, 19, 8, 15, 21, 28, 26, 27, -10, 6, 4, 3, 0, 4, 9, -5, 7, 13, + 38, 20, 20, 28, 11, 17, 15, 27, 24, 23, 28, 26, 34, 32, 24, 29, 30, 24, 34, 23, + -5, -12, -15, -4, -2, -6, -2, -7, -10, -15, -13, -16, -23, -22, -9, -18, -17, -15, -14, -15}; + double fValue_expected = 33.1289; + double pValue_expected = 0; + + QTest::newRow("socscistatistics") << col1Data << col2Data << fValue_expected << pValue_expected; +} + +void AnovaTest::oneWayAnova() { + QFETCH(QVector, col1Data); + QFETCH(QVector, col2Data); + QFETCH(double, fValue_expected); + QFETCH(double, pValue_expected); + + Column* col1 = new Column("col1", AbstractColumn::Text); + Column* col2 = new Column("col2", AbstractColumn::Numeric); + + col1->replaceTexts(0, col1Data); + col2->replaceValues(0, col2Data); + + QVector cols; + cols << col1 << col2; + + HypothesisTest anovaTest("One Way Anova"); + anovaTest.setColumns(cols); + + HypothesisTest::Test test; + test.type = HypothesisTest::Test::Type::Anova; + test.subtype = HypothesisTest::Test::SubType::OneWay; + test.tail = HypothesisTest::Test::Tail::Two; + + bool categoricalVariable = true; + bool equalVariance = true; + + anovaTest.performTest(test, categoricalVariable, equalVariance); + + double fValue = anovaTest.statisticValue()[0]; + double pValue = anovaTest.pValue()[0]; + + QDEBUG("fValue is " << fValue); + QDEBUG("pValue is: " << pValue); + QDEBUG("fValue_expected is " << fValue_expected); + QDEBUG("pValue_expected is: " << pValue_expected); + + FuzzyCompare(fValue, fValue_expected, 0.1); + FuzzyCompare(pValue, pValue_expected, 0.1); +} + +void AnovaTest::twoWayAnova_data() { + QTest::addColumn>("col1Data"); + QTest::addColumn>("col2Data"); + QTest::addColumn>("col3Data"); + QTest::addColumn("fCol1Value_expected"); + QTest::addColumn("fCol2Value_expected"); + QTest::addColumn("fInteractionValue_expected"); + QTest::addColumn("pCol1Value_expected"); + QTest::addColumn("pCol2Value_expected"); + + // First Sample + // This data set is taken from: http://statweb.stanford.edu/~susan/courses/s141/exanova.pdf + QVector col1Data = {"Super", "Super", "Super", "Super", "Super", "Super", "Super", "Super", "Super", "Super", "Super", "Super", "Best", "Best", "Best", "Best", "Best", "Best", "Best", "Best", "Best", "Best", "Best", "Best"}; + QVector col2Data = {"cold", "cold", "cold", "cold", "warm", "warm", "warm", "warm", "hot", "hot", "hot", "hot", "cold", "cold", "cold", "cold", "warm", "warm", "warm", "warm", "hot", "hot", "hot", "hot"}; + QVector col3Data = {4, 5, 6, 5, 7, 9, 8, 12, 10, 12, 11, 9, 6, 6, 4, 4, 13, 15, 12, 12, 12, 13, 10, 13}; + double fCol1Value_expected = 9.8108; + double fCol2Value_expected = 48.7297; + double fInteractionValue_expected = 3.9730; + double pCol1Value_expected = 0.005758; + double pCol2Value_expected = 5.44e-08; +// double pInteractionValue_expected = 0.037224; + + QTest::newRow("detergent vs temperature") << col1Data << col2Data << col3Data << + fCol1Value_expected << fCol2Value_expected << fInteractionValue_expected << + pCol1Value_expected << pCol2Value_expected; +} + +//TODO: check for pValue. In document probabilty is Pr(>F) +void AnovaTest::twoWayAnova() { + QFETCH(QVector, col1Data); + QFETCH(QVector, col2Data); + QFETCH(QVector, col3Data); + QFETCH(double, fCol1Value_expected); + QFETCH(double, fCol2Value_expected); + QFETCH(double, fInteractionValue_expected); + QFETCH(double, pCol1Value_expected); + QFETCH(double, pCol2Value_expected); + + Column* col1 = new Column("col1", AbstractColumn::Text); + Column* col2 = new Column("col2", AbstractColumn::Text); + Column* col3 = new Column("col3", AbstractColumn::Numeric); + + col1->replaceTexts(0, col1Data); + col2->replaceTexts(0, col2Data); + col3->replaceValues(0, col3Data); + + QVector cols; + cols << col1 << col2 << col3; + + HypothesisTest anovaTest("Two Way Anova"); + anovaTest.setColumns(cols); + + HypothesisTest::Test test; + test.type = HypothesisTest::Test::Type::Anova; + test.subtype = HypothesisTest::Test::SubType::TwoWay; + test.tail = HypothesisTest::Test::Tail::Two; + + anovaTest.performTest(test); + double fCol1Value = anovaTest.statisticValue()[0]; + double fCol2Value = anovaTest.statisticValue()[1]; + double fInteractionValue = anovaTest.statisticValue()[2]; + + double pCol1Value = anovaTest.pValue()[0]; + double pCol2Value = anovaTest.pValue()[1]; + + QDEBUG("size of statistic value is " << anovaTest.statisticValue().size()); + QDEBUG("fCol1Value is " << fCol1Value); + QDEBUG("fCol1Value_expected is " << fCol1Value_expected); + QDEBUG("fCol2Value is " << fCol2Value); + QDEBUG("fCol2Value_expected is " << fCol2Value_expected); + QDEBUG("fInteractionValue is " << fInteractionValue); + QDEBUG("fInteractionValue_expected is " << fInteractionValue_expected); + + QDEBUG("pCol1Value is " << pCol1Value); + QDEBUG("pCol1Value_expected is " << pCol1Value_expected); + QDEBUG("pCol2Value is " << pCol2Value); + QDEBUG("pCol2Value_expected is " << pCol2Value_expected); + + FuzzyCompare(fCol1Value, fCol1Value_expected, 0.1); + FuzzyCompare(fCol2Value, fCol2Value_expected, 0.1); + FuzzyCompare(fInteractionValue, fInteractionValue_expected, 0.1); + FuzzyCompare(pCol1Value, pCol1Value_expected, 0.1); + FuzzyCompare(pCol2Value, pCol2Value_expected, 0.1); +} + +QTEST_MAIN(AnovaTest) diff --git a/tests/stats/anova/AnovaTest.h b/tests/stats/anova/AnovaTest.h new file mode 100644 index 000000000..df285311e --- /dev/null +++ b/tests/stats/anova/AnovaTest.h @@ -0,0 +1,42 @@ +/*************************************************************************** + File : AnovaTest.h + Project : LabPlot + Description : Tests for data correlation + -------------------------------------------------------------------- + Copyright : (C) 2019 Devanshu Agarwal (agarwaldevanshu8@gmail.com) + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + * This program is distributed in the hope that it will be useful, * + * but WITHOUT ANY WARRANTY; without even the implied warranty of * + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * + * GNU General Public License for more details. * + * * + * You should have received a copy of the GNU General Public License * + * along with this program; if not, write to the Free Software * + * Foundation, Inc., 51 Franklin Street, Fifth Floor, * + * Boston, MA 02110-1301 USA * + * * + ***************************************************************************/ +#ifndef ANOVATEST_H +#define ANOVATEST_H + +#include <../../CommonTest.h> + +class AnovaTest : public CommonTest { + Q_OBJECT + +private slots: + void oneWayAnova_data(); + void oneWayAnova(); + + void twoWayAnova_data(); + void twoWayAnova(); +}; +#endif diff --git a/tests/stats/anova/CMakeLists.txt b/tests/stats/anova/CMakeLists.txt new file mode 100644 index 000000000..b76e61b18 --- /dev/null +++ b/tests/stats/anova/CMakeLists.txt @@ -0,0 +1,7 @@ +add_executable (anova AnovaTest.cpp ../../CommonTest.cpp) + +target_link_libraries(anova Qt5::Test) +target_link_libraries(anova KF5::Archive KF5::XmlGui) +target_link_libraries(anova labplot2lib) + +add_test(NAME anova COMMAND anova) diff --git a/tests/stats/ttest/TTestTest.cpp b/tests/stats/ttest/TTestTest.cpp index dda774d50..ff6455e22 100644 --- a/tests/stats/ttest/TTestTest.cpp +++ b/tests/stats/ttest/TTestTest.cpp @@ -1,200 +1,200 @@ /*************************************************************************** File : CorrelationTest.cpp Project : LabPlot Description : Tests for data correlation -------------------------------------------------------------------- Copyright : (C) 2019 Devanshu Agarwal (agarwaldevanshu8@gmail.com) ***************************************************************************/ /*************************************************************************** * * * This program is free software; you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * * the Free Software Foundation; either version 2 of the License, or * * (at your option) any later version. * * * * This program is distributed in the hope that it will be useful, * * but WITHOUT ANY WARRANTY; without even the implied warranty of * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * * GNU General Public License for more details. * * * * You should have received a copy of the GNU General Public License * * along with this program; if not, write to the Free Software * * Foundation, Inc., 51 Franklin Street, Fifth Floor, * * Boston, MA 02110-1301 USA * * * ***************************************************************************/ #include "TTestTest.h" #include "backend/hypothesisTest/HypothesisTest.h" #include "backend/core/AbstractColumn.h" #include "backend/core/column/Column.h" void TTestTest::twoSampleIndependent_data() { QTest::addColumn>("col1Data"); QTest::addColumn>("col2Data"); QTest::addColumn("tValue_expected"); QTest::addColumn("pValue_expected"); // First Sample // This data set is taken from "JASP" QVector col1Data = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}; QVector col2Data = {3, 1, 5, 4, 6, 4, 6, 2, 0, 5, 4, 5, 4, 3, 6, 6, 8, 5, 5, 4, 2, 5, 7, 5}; double tValue_expected = -1.713; double pValue_expected = 0.101; QTest::newRow("invisible cloak") << col1Data << col2Data << tValue_expected << pValue_expected; // Second Sample // This data set is taken from "JASP" col1Data = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; col2Data = {42, 46, 43, 10, 55, 17, 26, 60, 62, 53, 37, 42, 33, 37, 41, 42, 19, 55, 54, 28, 20, 48, 85, 24, 56, 43, 59, 58, 52, 71, 62, 43, 54, 49, 57, 61, 33, 44, 46, 67, 43, 49, 57, 53}; tValue_expected = -2.26; pValue_expected = 0.028; QTest::newRow("directed control activities") << col1Data << col2Data << tValue_expected << pValue_expected; } void TTestTest::twoSampleIndependent() { QFETCH(QVector, col1Data); QFETCH(QVector, col2Data); QFETCH(double, tValue_expected); QFETCH(double, pValue_expected); Column* col1 = new Column("col1", AbstractColumn::Numeric); Column* col2 = new Column("col2", AbstractColumn::Numeric); col1->replaceValues(0, col1Data); col2->replaceValues(0, col2Data); QVector cols; cols << col1 << col2; HypothesisTest tTest("Two Sample Independent"); tTest.setColumns(cols); HypothesisTest::Test test; test.type = HypothesisTest::Test::Type::TTest; test.subtype = HypothesisTest::Test::SubType::TwoSampleIndependent; test.tail = HypothesisTest::Test::Tail::Two; bool categoricalVariable = true; bool equalVariance = true; tTest.performTest(test, categoricalVariable, equalVariance); - double tValue = tTest.statisticValue(); - double pValue = tTest.pValue(); + double tValue = tTest.statisticValue()[0]; + double pValue = tTest.pValue()[0]; qDebug() << "tValue is " << tValue; qDebug() << "pValue is: " << pValue; qDebug() << "tValue_expected is " << tValue_expected; qDebug() << "pValue_expected is: " << pValue_expected; FuzzyCompare(tValue, tValue_expected, (0.01) / abs(tValue)); FuzzyCompare(pValue, pValue_expected, (0.01) / abs(pValue)); } void TTestTest::twoSamplePaired_data() { QTest::addColumn>("col1Data"); QTest::addColumn>("col2Data"); QTest::addColumn("tValue_expected"); QTest::addColumn("pValue_expected"); // First Sample // This data set is taken from "JASP" // DATA SET:: Moon and Aggression QVector col1Data = {3.33, 3.67, 2.67, 3.33, 3.33, 3.67, 4.67, 2.67, 6, 4.33, 3.33, 0.67, 1.33, 0.33, 2}; QVector col2Data = {0.27, 0.59, 0.32, 0.19, 1.26, 0.11, 0.3, 0.4, 1.59, 0.6, 0.65, 0.69, 1.26, 0.23, 0.38}; double tValue_expected = 6.452; double pValue_expected = 0.001; QTest::newRow("Moon and Aggression") << col1Data << col2Data << tValue_expected << pValue_expected; } void TTestTest::twoSamplePaired() { QFETCH(QVector, col1Data); QFETCH(QVector, col2Data); QFETCH(double, tValue_expected); QFETCH(double, pValue_expected); Column* col1 = new Column("col1", AbstractColumn::Numeric); Column* col2 = new Column("col2", AbstractColumn::Numeric); col1->replaceValues(0, col1Data); col2->replaceValues(0, col2Data); QVector cols; cols << col1 << col2; HypothesisTest tTest("Two Sample Paried"); tTest.setColumns(cols); HypothesisTest::Test test; test.type = HypothesisTest::Test::Type::TTest; test.subtype = HypothesisTest::Test::SubType::TwoSamplePaired; test.tail = HypothesisTest::Test::Tail::Two; tTest.performTest(test); - double tValue = tTest.statisticValue(); - double pValue = tTest.pValue(); + double tValue = tTest.statisticValue()[0]; + double pValue = tTest.pValue()[0]; qDebug() << "tValue is " << tValue; qDebug() << "pValue is: " << pValue; qDebug() << "tValue_expected is " << tValue_expected; qDebug() << "pValue_expected is: " << pValue_expected; FuzzyCompare(tValue, tValue_expected, (0.01) / abs(tValue)); FuzzyCompare(pValue, pValue_expected, (0.01) / abs(pValue)); } void TTestTest::oneSample_data() { QTest::addColumn>("col1Data"); QTest::addColumn("populationMean"); QTest::addColumn("tValue_expected"); QTest::addColumn("pValue_expected"); // First Sample // This data set is taken from "JASP" // DATA SET:: Weight Gain; QVector col1Data = {13.2, 8.58, 14.08, 8.58, 10.56, 14.74, 7.92, 13.2, 12.76, 5.72, 11.66, 7.04, 3.08, 15.62, 14.3, 5.5}; double populationMean = 16; double tValue_expected = -5.823; double pValue_expected = 0.001; QTest::newRow("weight gain") << col1Data << populationMean << tValue_expected << pValue_expected; } void TTestTest::oneSample() { QFETCH(QVector, col1Data); QFETCH(double, populationMean); QFETCH(double, tValue_expected); QFETCH(double, pValue_expected); Column* col1 = new Column("col1", AbstractColumn::Numeric); col1->replaceValues(0, col1Data); QVector cols; cols << col1; HypothesisTest tTest("One Sample"); tTest.setColumns(cols); tTest.setPopulationMean(populationMean); HypothesisTest::Test test; test.type = HypothesisTest::Test::Type::TTest; test.subtype = HypothesisTest::Test::SubType::OneSample; test.tail = HypothesisTest::Test::Tail::Two; tTest.performTest(test); - double tValue = tTest.statisticValue(); - double pValue = tTest.pValue(); + double tValue = tTest.statisticValue()[0]; + double pValue = tTest.pValue()[0]; qDebug() << "tValue is " << tValue; qDebug() << "pValue is: " << pValue; qDebug() << "tValue_expected is " << tValue_expected; qDebug() << "pValue_expected is: " << pValue_expected; FuzzyCompare(tValue, tValue_expected, (0.01) / fabs(tValue)); FuzzyCompare(pValue, pValue_expected, (0.01) / fabs(pValue)); } QTEST_MAIN(TTestTest)