diff --git a/src/backend/generalTest/CorrelationCoefficient.cpp b/src/backend/generalTest/CorrelationCoefficient.cpp index ef5cd3c55..3834fb127 100644 --- a/src/backend/generalTest/CorrelationCoefficient.cpp +++ b/src/backend/generalTest/CorrelationCoefficient.cpp @@ -1,426 +1,418 @@ /*************************************************************************** File : CorrelationCoefficient.cpp Project : LabPlot Description : Finding Correlation Coefficient on data provided -------------------------------------------------------------------- Copyright : (C) 2019 Devanshu Agarwal(agarwaldevanshu8@gmail.com) ***************************************************************************/ /*************************************************************************** * * * This program is free software; you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * * the Free Software Foundation; either version 2 of the License, or * * (at your option) any later version. * * * * This program is distributed in the hope that it will be useful, * * but WITHOUT ANY WARRANTY; without even the implied warranty of * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * * GNU General Public License for more details. * * * * You should have received a copy of the GNU General Public License * * along with this program; if not, write to the Free Software * * Foundation, Inc., 51 Franklin Street, Fifth Floor, * * Boston, MA 02110-1301 USA * * * ***************************************************************************/ #include "CorrelationCoefficient.h" #include "GeneralTest.h" #include "kdefrontend/generalTest/CorrelationCoefficientView.h" #include "backend/spreadsheet/Spreadsheet.h" #include "backend/core/column/Column.h" #include "backend/lib/macros.h" #include #include #include #include #include #include #include #include #include #include #include #include extern "C" { #include "backend/nsl/nsl_stats.h" } CorrelationCoefficient::CorrelationCoefficient(const QString& name) : GeneralTest(name, AspectType::CorrelationCoefficient) { } CorrelationCoefficient::~CorrelationCoefficient() { } void CorrelationCoefficient::performTest(int test, bool categoricalVariable) { m_statsTable = ""; m_correlationValue = 0; m_statisticValue.clear(); m_pValue.clear(); for (int i = 0; i < RESULTLINESCOUNT; i++) m_resultLine[i]->clear(); switch (testType(test)) { case CorrelationCoefficient::Pearson: { m_currTestName = "

" + i18n("Pearson's r Correlation Test") + "

"; performPearson(categoricalVariable); break; } case CorrelationCoefficient::Kendall: m_currTestName = "

" + i18n("Kendall's Rank Correlation Test") + "

"; performKendall(); break; case CorrelationCoefficient::Spearman: { m_currTestName = "

" + i18n("Spearman Correlation Coefficient Test") + "

"; performSpearman(); break; } case CorrelationCoefficient::ChiSquare: switch (testSubtype(test)) { case CorrelationCoefficient::IndependenceTest: break; } break; } emit changed(); } double CorrelationCoefficient::correlationValue() const { return m_correlationValue; } QList CorrelationCoefficient::statisticValue() const { return m_statisticValue; } QList CorrelationCoefficient::pValue() const { return m_pValue; } /*************************************************************************************************************************** * Private Implementations * ************************************************************************************************************************/ /*********************************************Pearson r ******************************************************************/ //Formulaes are taken from https://www.statisticssolutions.com/correlation-pearson-kendall-spearman/ // variables: // N = total number of observations // sumColx = sum of values in colx // sumSqColx = sum of square of values in colx // sumColxColy = sum of product of values in colx and coly //TODO: support for col1 is categorical. //TODO: add tooltip for correlation value result //TODO: find p value void CorrelationCoefficient::performPearson(bool categoricalVariable) { if (m_columns.count() != 2) { printError("Select only 2 columns "); return; } if (categoricalVariable) { printLine(1, "currently categorical variable not supported", "blue"); return; } QString col1Name = m_columns[0]->name(); QString col2Name = m_columns[1]->name(); if (!m_columns[1]->isNumeric()) printError("Column " + col2Name + " should contain only numeric or interger values"); int N = findCount(m_columns[0]); if (N != findCount(m_columns[1])) { printError("Number of data values in Column: " + col1Name + "and Column: " + col2Name + "are not equal"); return; } double sumCol1 = findSum(m_columns[0], N); double sumCol2 = findSum(m_columns[1], N); double sumSqCol1 = findSumSq(m_columns[0], N); double sumSqCol2 = findSumSq(m_columns[1], N); double sumCol12 = 0; for (int i = 0; i < N; i++) sumCol12 += m_columns[0]->valueAt(i) * m_columns[1]->valueAt(i); // printing table; // cell constructor structure; data, level, rowSpanCount, m_columnspanCount, isHeader; QList rowMajor; int level = 0; // horizontal header QString sigma = UTF8_QSTRING("Σ"); rowMajor.append(new Cell("", level, true)); rowMajor.append(new Cell("N", level, true, "Total Number of Observations")); rowMajor.append(new Cell(QString(sigma + "Scores"), level, true, "Sum of Scores in each column")); rowMajor.append(new Cell(QString(sigma + "Scores2"), level, true, "Sum of Squares of scores in each column")); rowMajor.append(new Cell(QString(sigma + "(" + UTF8_QSTRING("∏") + "Scores)"), level, true, "Sum of product of scores of both columns")); //data with vertical header. level++; rowMajor.append(new Cell(col1Name, level, true)); rowMajor.append(new Cell(N, level)); rowMajor.append(new Cell(sumCol1, level)); rowMajor.append(new Cell(sumSqCol1, level)); rowMajor.append(new Cell(sumCol12, level, false, "", 2, 1)); level++; rowMajor.append(new Cell(col2Name, level, true)); rowMajor.append(new Cell(N, level)); rowMajor.append(new Cell(sumCol2, level)); rowMajor.append(new Cell(sumSqCol2, level)); m_statsTable += getHtmlTable3(rowMajor); m_correlationValue = (N * sumCol12 - sumCol1*sumCol2) / sqrt((N * sumSqCol1 - gsl_pow_2(sumCol1)) * (N * sumSqCol2 - gsl_pow_2(sumCol2))); printLine(0, QString("Correlation Value is %1").arg(round(m_correlationValue)), "green"); } /***********************************************Kendall ******************************************************************/ // used knight algorithm for fast performance O(nlogn) rather than O(n^2) // http://adereth.github.io/blog/2013/10/30/efficiently-computing-kendalls-tau/ // TODO: Change date format type to original for numeric type; // TODO: add tooltips. // TODO: Compute tauB for ties. // TODO: find P Value from Z Value void CorrelationCoefficient::performKendall() { if (m_columns.count() != 2) { printError("Select only 2 columns "); return; } QString col1Name = m_columns[0]->name(); QString col2Name = m_columns[1]->name(); int N = findCount(m_columns[0]); if (N != findCount(m_columns[1])) { printError("Number of data values in Column: " + col1Name + "and Column: " + col2Name + "are not equal"); return; } QVector col2Ranks(N); if (m_columns[0]->isNumeric()) { if (m_columns[0]->isNumeric() && m_columns[1]->isNumeric()) { for (int i = 0; i < N; i++) col2Ranks[int(m_columns[0]->valueAt(i)) - 1] = int(m_columns[1]->valueAt(i)); } else { printError(QString("Ranking System should be same for both Column: %1 and Column: %2
" "Hint: Check for data types of columns").arg(col1Name).arg(col2Name)); return; } } else { AbstractColumn::ColumnMode origCol1Mode = m_columns[0]->columnMode(); AbstractColumn::ColumnMode origCol2Mode = m_columns[1]->columnMode(); m_columns[0]->setColumnMode(AbstractColumn::Text); m_columns[1]->setColumnMode(AbstractColumn::Text); QMap ValueToRank; for (int i = 0; i < N; i++) { if (ValueToRank[m_columns[0]->textAt(i)] != 0) { printError("Currently ties are not supported"); m_columns[0]->setColumnMode(origCol1Mode); m_columns[1]->setColumnMode(origCol2Mode); return; } ValueToRank[m_columns[0]->textAt(i)] = i + 1; } for (int i = 0; i < N; i++) col2Ranks[i] = ValueToRank[m_columns[1]->textAt(i)]; m_columns[0]->setColumnMode(origCol1Mode); m_columns[1]->setColumnMode(origCol2Mode); } int nPossiblePairs = (N * (N - 1)) / 2; int nDiscordant = findDiscordants(col2Ranks.data(), 0, N - 1); int nCorcordant = nPossiblePairs - nDiscordant; m_correlationValue = double(nCorcordant - nDiscordant) / nPossiblePairs; m_statisticValue.append((3 * (nCorcordant - nDiscordant)) / sqrt(N * (N- 1) * (2 * N + 5) / 2)); printLine(0, QString("Number of Discordants are %1").arg(nDiscordant), "green"); printLine(1, QString("Number of Concordant are %1").arg(nCorcordant), "green"); printLine(2, QString("Tau a is %1").arg(round(m_correlationValue)), "green"); printLine(3, QString("Z Value is %1").arg(round(m_statisticValue[0])), "green"); return; } /***********************************************Spearman ******************************************************************/ // All formulaes and symbols are taken from : https://www.statisticshowto.datasciencecentral.com/spearman-rank-correlation-definition-calculate/ void CorrelationCoefficient::performSpearman() { if (m_columns.count() != 2) { printError("Select only 2 columns "); return; } QString col1Name = m_columns[0]->name(); QString col2Name = m_columns[1]->name(); int N = findCount(m_columns[0]); if (N != findCount(m_columns[1])) { printError("Number of data values in Column: " + col1Name + "and Column: " + col2Name + "are not equal"); return; } QMap col1Ranks; convertToRanks(m_columns[0], N, col1Ranks); QMap col2Ranks; convertToRanks(m_columns[1], N, col2Ranks); double ranksCol1Mean = 0; double ranksCol2Mean = 0; for (int i = 0; i < N; i++) { ranksCol1Mean += col1Ranks[int(m_columns[0]->valueAt(i))]; ranksCol2Mean += col2Ranks[int(m_columns[1]->valueAt(i))]; } ranksCol1Mean = ranksCol1Mean / N; ranksCol2Mean = ranksCol2Mean / N; double s12 = 0; double s1 = 0; double s2 = 0; for (int i = 0; i < N; i++) { double centeredRank_1 = col1Ranks[int(m_columns[0]->valueAt(i))] - ranksCol1Mean; double centeredRank_2 = col2Ranks[int(m_columns[1]->valueAt(i))] - ranksCol2Mean; s12 += centeredRank_1 * centeredRank_2; s1 += gsl_pow_2(centeredRank_1); s2 += gsl_pow_2(centeredRank_2); } s12 = s12 / N; s1 = s1 / N; s2 = s2 / N; m_correlationValue = s12 / std::sqrt(s1 * s2); printLine(0, QString("Spearman Rank Correlation value is %1").arg(m_correlationValue), "green"); } /***********************************************Chi Square Test for Indpendence******************************************************************/ void CorrelationCoefficient::chiSquareIndpendence() { } /***********************************************Helper Functions******************************************************************/ -int CorrelationCoefficient::testType(int test) { - return test & 0x0F; -} - -int CorrelationCoefficient::testSubtype(int test) { - return test & 0xF0; -} - int CorrelationCoefficient::findDiscordants(int *ranks, int start, int end) { if (start >= end) return 0; int mid = (start + end) / 2; int leftDiscordants = findDiscordants(ranks, start, mid); int rightDiscordants = findDiscordants(ranks, mid + 1, end); int len = end - start + 1; int leftLen = mid - start + 1; int rightLen = end - mid; int leftLenRemain = leftLen; QVector leftRanks(leftLen); QVector rightRanks(rightLen); for (int i = 0; i < leftLen; i++) leftRanks[i] = ranks[start + i]; for (int i = leftLen; i < leftLen + rightLen; i++) rightRanks[i - leftLen] = ranks[start + i]; int mergeDiscordants = 0; int i = 0, j = 0, k =0; while (i < len) { if (j >= leftLen) { ranks[start + i] = rightRanks[k]; k++; } else if (k >= rightLen) { ranks[start + i] = leftRanks[j]; j++; } else if (leftRanks[j] < rightRanks[k]) { ranks[start + i] = leftRanks[j]; j++; leftLenRemain--; } else if (leftRanks[j] > rightRanks[k]) { ranks[start + i] = rightRanks[k]; mergeDiscordants += leftLenRemain; k++; } i++; } return leftDiscordants + rightDiscordants + mergeDiscordants; } void CorrelationCoefficient::convertToRanks(const Column* col, int N, QMap &ranks) { if (col->isNumeric()) return; double* sortedList = new double[N]; for (int i = 0; i < N; i++) sortedList[i] = col->valueAt(i); std::sort(sortedList, sortedList + N, std::greater()); ranks.clear(); for (int i = 0; i < N; i++) ranks[sortedList[i]] = i + 1; delete[] sortedList; } void CorrelationCoefficient::convertToRanks(const Column* col, QMap &ranks) { convertToRanks(col, findCount(col), ranks); } /***********************************************Virtual Functions******************************************************************/ QWidget* CorrelationCoefficient::view() const { if (!m_partView) { m_view = new CorrelationCoefficientView(const_cast(this)); m_partView = m_view; } return m_partView; } diff --git a/src/backend/generalTest/CorrelationCoefficient.h b/src/backend/generalTest/CorrelationCoefficient.h index ed9d8427a..5bfac5fa0 100644 --- a/src/backend/generalTest/CorrelationCoefficient.h +++ b/src/backend/generalTest/CorrelationCoefficient.h @@ -1,81 +1,77 @@ /*************************************************************************** File : CorrelationCoefficient.h Project : LabPlot Description : Finding Correlation Coefficient on data provided -------------------------------------------------------------------- Copyright : (C) 2019 Devanshu Agarwal(agarwaldevanshu8@gmail.com) ***************************************************************************/ /*************************************************************************** * * * This program is free software; you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * * the Free Software Foundation; either version 2 of the License, or * * (at your option) any later version. * * * * This program is distributed in the hope that it will be useful, * * but WITHOUT ANY WARRANTY; without even the implied warranty of * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * * GNU General Public License for more details. * * * * You should have received a copy of the GNU General Public License * * along with this program; if not, write to the Free Software * * Foundation, Inc., 51 Franklin Street, Fifth Floor, * * Boston, MA 02110-1301 USA * * * ***************************************************************************/ #ifndef CORRELATIONCOEFFICIENT_H #define CORRELATIONCOEFFICIENT_H #include "GeneralTest.h" class CorrelationCoefficientView; class CorrelationCoefficient : public GeneralTest { Q_OBJECT public: explicit CorrelationCoefficient(const QString& name); ~CorrelationCoefficient() override; enum Test{ //Type Pearson = 0x01, Spearman = 0x02, Kendall = 0x04, ChiSquare = 0x08, //Subtype IndependenceTest = 0x10 }; double correlationValue() const; QList statisticValue() const; QList pValue() const; QWidget* view() const override; - void performTest(int m_test, bool categoricalVariable = false); + void performTest(int test, bool categoricalVariable = false); private: void performPearson(bool categoricalVariable); void performKendall(); void performSpearman(); void chiSquareIndpendence(); - - int testType(int test); - int testSubtype(int test); - int findDiscordants(int* ranks, int start, int end); void convertToRanks(const Column* col, int N, QMap &ranks); void convertToRanks(const Column* col, QMap &ranks); double m_correlationValue; QList m_statisticValue; QList m_pValue; }; #endif // CORRELATIONCOEFFICIENT_H diff --git a/src/backend/generalTest/GeneralTest.cpp b/src/backend/generalTest/GeneralTest.cpp index a867626b4..07ac2cf04 100644 --- a/src/backend/generalTest/GeneralTest.cpp +++ b/src/backend/generalTest/GeneralTest.cpp @@ -1,567 +1,574 @@ /*************************************************************************** File : GeneralTest.cpp Project : LabPlot Description : Doing Hypothesis-Test on data provided -------------------------------------------------------------------- Copyright : (C) 2019 Devanshu Agarwal(agarwaldevanshu8@gmail.com) ***************************************************************************/ /*************************************************************************** * * * This program is free software; you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * * the Free Software Foundation; either version 2 of the License, or * * (at your option) any later version. * * * * This program is distributed in the hope that it will be useful, * * but WITHOUT ANY WARRANTY; without even the implied warranty of * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * * GNU General Public License for more details. * * * * You should have received a copy of the GNU General Public License * * along with this program; if not, write to the Free Software * * Foundation, Inc., 51 Franklin Street, Fifth Floor, * * Boston, MA 02110-1301 USA * * * ***************************************************************************/ #include "GeneralTest.h" #include "kdefrontend/generalTest/HypothesisTestView.h" #include "backend/spreadsheet/Spreadsheet.h" #include "backend/core/column/Column.h" #include "backend/lib/macros.h" //#include //#include //#include #include #include //#include //#include //#include #include #include #include extern "C" { #include "backend/nsl/nsl_stats.h" } GeneralTest::GeneralTest(const QString& name, const AspectType& type) : AbstractPart(name, type), m_summaryLayout(new QVBoxLayout()) { m_currTestName = i18n("Result Table"); for (int i = 0; i < RESULTLINESCOUNT; i++) { m_resultLine[i] = new QLabel(); m_summaryLayout->addWidget(m_resultLine[i]); } } GeneralTest::~GeneralTest() { } void GeneralTest::setDataSourceType(DataSourceType type) { if (type != m_dataSourceType) m_dataSourceType = type; } GeneralTest::DataSourceType GeneralTest::dataSourceType() const { return m_dataSourceType; } void GeneralTest::setDataSourceSpreadsheet(Spreadsheet* spreadsheet) { m_dataSourceSpreadsheet = spreadsheet; for (auto* col : m_dataSourceSpreadsheet->children()) m_allColumns << col->name(); } QString GeneralTest::testName() { return m_currTestName; } QString GeneralTest::statsTable() { return m_statsTable; } QVBoxLayout* GeneralTest::summaryLayout() { return m_summaryLayout; } void GeneralTest::setColumns(QStringList cols) { m_columns.clear(); Column* column = new Column("column"); for (QString col : cols) { if (!cols.isEmpty()) { column = m_dataSourceSpreadsheet->column(col); m_columns.append(column); } } delete column; } void GeneralTest::setColumns(const QVector &cols) { m_columns = cols; } /******************************************************************************************************************** * Protected functions implementations [Helper Functions] ********************************************************************************************************************/ +int GeneralTest::testType(int test) { + return test & 0x0F; +} + +int GeneralTest::testSubtype(int test) { + return test & 0xF0; +} //TODO: we should implement or use a general round method QString GeneralTest::round(QVariant number, int precision) { if (number.userType() == QMetaType::Double || number.userType() == QMetaType::Float) { double multiplierPrecision = gsl_pow_int(10, precision); int tempNum = int(number.toDouble()*multiplierPrecision*10); if (tempNum % 10 < 5) return QString::number((tempNum/10) / multiplierPrecision); else return QString::number((tempNum/10 + 1) / multiplierPrecision); } return i18n("%1", number.toString()); } //TODO: Doesn't Column already have a function for this? int GeneralTest::findCount(const Column *column) { int N = column->rowCount(); switch (column->columnMode()) { case (AbstractColumn::Numeric): case (AbstractColumn::Integer): { for (int i = 0; i < N; i++) if (std::isnan(column->valueAt(i))) { N = i; break; } break; } case (AbstractColumn::Month): case (AbstractColumn::Day): case (AbstractColumn::Text): { for (int i = 0; i < N; i++) if (column->textAt(i).isEmpty()) { N = i; break; } break; } case (AbstractColumn::DateTime): break; } return N; } // TODO: put into Column double GeneralTest::findSum(const Column *column, int N) { if (!column->isNumeric()) return 0; if (N < 0) N = findCount(column); double sum = 0; for (int i = 0; i < N; i++) sum += column->valueAt(i); return sum; } // TODO: put into Column double GeneralTest::findSumSq(const Column *column, int N) { if (!column->isNumeric()) return 0; if (N < 0) N = findCount(column); double sumSq = 0; for (int i = 0; i < N; i++) sumSq += gsl_pow_2(column->valueAt(i)); return sumSq; } // TODO: put into Column double GeneralTest::findMean(const Column *column, int N) { if (!column->isNumeric()) return 0; if (N < 0) N = findCount(column); double sum = findSum(column, N); return sum / N; } // TODO: put into Column double GeneralTest::findStd(const Column *column, int N, double mean) { if (!column->isNumeric()) return 0; double std = 0; for (int i = 0; i < N; i++) { double row = column->valueAt(i); std += gsl_pow_2( (row - mean)); } if (N > 1) std = std / (N-1); std = sqrt(std); return std; } // TODO: put into Column double GeneralTest::findStd(const Column *column, int N) { if (!column->isNumeric()) return 0; if (N < 0) N = findCount(column); double mean = findMean(column, N); return findStd(column, N, mean); } GeneralTest::ErrorType GeneralTest::findStats(const Column* column, int& count, double& sum, double& mean, double& std) { count = findCount(column); sum = findSum(column, count); mean = findMean(column, count); std = findStd(column, count, mean); if (count < 1) return GeneralTest::ErrorEmptyColumn; return GeneralTest::NoError; } GeneralTest::ErrorType GeneralTest::findStatsPaired(const Column* column1, const Column* column2, int& count, double& sum, double& mean, double& std) { sum = 0; mean = 0; std = 0; int count1 = column1->rowCount(); int count2 = column2->rowCount(); count = qMin(count1, count2); double cell1, cell2; for (int i = 0; i < count; i++) { cell1 = column1->valueAt(i); cell2 = column2->valueAt(i); if (std::isnan(cell1) || std::isnan(cell2)) { if (std::isnan(cell1) && std::isnan(cell2)) count = i; else return GeneralTest::ErrorUnqualSize; break; } sum += cell1 - cell2; } if (count < 1) return GeneralTest::ErrorEmptyColumn; mean = sum / count; double row; for (int i = 0; i < count; i++) { cell1 = column1->valueAt(i); cell2 = column2->valueAt(i); row = cell1 - cell2; std += gsl_pow_2( (row - mean)); } if (count > 1) std = std / (count-1); std = sqrt(std); return GeneralTest::NoError; } void GeneralTest::countPartitions(Column* column, int& np, int& totalRows) { totalRows = column->rowCount(); np = 0; QString cellValue; QMap discoveredCategoricalVar; AbstractColumn::ColumnMode originalColMode = column->columnMode(); column->setColumnMode(AbstractColumn::Text); for (int i = 0; i < totalRows; i++) { cellValue = column->textAt(i); if (cellValue.isEmpty()) { totalRows = i; break; } if (discoveredCategoricalVar[cellValue]) continue; discoveredCategoricalVar[cellValue] = true; np++; } column->setColumnMode(originalColMode); } GeneralTest::ErrorType GeneralTest::findStatsCategorical(Column* column1, Column* column2, int n[], double sum[], double mean[], double std[], QMap& colName, const int& np, const int& totalRows) { Column* columns[] = {column1, column2}; for (int i = 0; i < np; i++) { n[i] = 0; sum[i] = 0; mean[i] = 0; std[i] = 0; } AbstractColumn::ColumnMode originalColMode = columns[0]->columnMode(); columns[0]->setColumnMode(AbstractColumn::Text); int partitionNumber = 1; for (int i = 0; i < totalRows; i++) { QString name = columns[0]->textAt(i); double value = columns[1]->valueAt(i); if (std::isnan(value)) { columns[0]->setColumnMode(originalColMode); return GeneralTest::ErrorUnqualSize; } if (colName[name] == 0) { colName[name] = partitionNumber; partitionNumber++; } n[colName[name]-1]++; sum[colName[name]-1] += value; } for (int i = 0; i < np; i++) mean[i] = sum[i] / n[i]; for (int i = 0; i < totalRows; i++) { QString name = columns[0]->textAt(i); double value = columns[1]->valueAt(i); std[colName[name]-1] += gsl_pow_2( (value - mean[colName[name]-1])); } for (int i = 0; i < np; i++) { if (n[i] > 1) std[i] = std[i] / (n[i] - 1); std[i] = sqrt(std[i]); } columns[0]->setColumnMode(originalColMode); if (columns[0]->isNumeric()) { } return GeneralTest::NoError; } QString GeneralTest::getHtmlTable(int row, int column, QVariant* rowMajor) { if (row < 1 || column < 1) return QString(); QString table; table = "" "" " "; QString bg = "tg-0pky"; bool pky = true; QString element; table += " "; for (int j = 0; j < column; j++) { element = rowMajor[j].toString(); table += " "; } table += " "; if (pky) bg = "tg-0pky"; else bg = "tg-btxf"; pky = !pky; for (int i = 1; i < row; i++) { table += " "; QString element = round(rowMajor[i*column]); table += " "; for (int j = 1; j < column; j++) { element = round(rowMajor[i*column+j]); table += " "; } table += " "; if (pky) bg = "tg-0pky"; else bg = "tg-btxf"; pky = !pky; } table += "
" + i18n("%1", element) + "
" + i18n("%1", element) + "" + i18n("%1", element) + "
"; return table; } QString GeneralTest::getHtmlTable3(const QList& rowMajor) { int rowMajorSize = rowMajor.size(); if (rowMajorSize == 0) return QString(); QString startToolTip = "[tooltip]"; QString endToolTip = "[/tooltip]"; QString startData = "[data]"; QString endData = "[/data]"; QString startTip = "[tip]"; QString endTip = "[/tip]"; QString table; table = ""; table += ""; table += " "; int prevLevel = 0; for (int i = 0; i < rowMajorSize; i++) { Cell* currCell = rowMajor[i]; if (currCell->level != prevLevel) { table += " "; table += " "; prevLevel = currCell->level; } QString cellStartTag = ""; table += "
isHeader) { cellStartTag = "data); if (!currCell->tooltip.isEmpty()) cellData = startToolTip+ startData+cellData+endData+ startTip+i18n("%1", currCell->tooltip)+endTip+ endToolTip; table += cellStartTag + "rowspan=" + QString::number(currCell->rowSpanCount) + " " + "colspan=" + QString::number(currCell->columnSpanCount) + ">" + cellData + cellEndTag; } table += "
"; return table; } QString GeneralTest::getLine(const QString& msg, const QString& color) { return "

" + i18n("%1", msg) + "

"; } void GeneralTest::printLine(const int& index, const QString& msg, const QString& color) { if (index < 0 || index >= 10) return; m_resultLine[index]->setText(getLine(msg, color)); return; } void GeneralTest::printTooltip(const int &index, const QString &msg) { if (index < 0 || index >= 10) return; m_resultLine[index]->setToolTip(i18n("%1", msg)); } void GeneralTest::printError(const QString& errorMsg) { printLine(0, errorMsg, "red"); } /******************************************************************************************************************** * virtual functions implementations ********************************************************************************************************************/ /*! Saves as XML. */ void GeneralTest::save(QXmlStreamWriter* writer) const { writer->writeStartElement("GeneralTest"); writeBasicAttributes(writer); writeCommentElement(writer); writer->writeEndElement(); } /*! Loads from XML. */ bool GeneralTest::load(XmlStreamReader* reader, bool preview) { Q_UNUSED(preview); if (!readBasicAttributes(reader)) return false; return !reader->hasError(); } Spreadsheet *GeneralTest::dataSourceSpreadsheet() const { return m_dataSourceSpreadsheet; } bool GeneralTest::exportView() const { return true; } bool GeneralTest::printView() { return true; } bool GeneralTest::printPreview() const { return true; } /*! Constructs a primary view on me. This method may be called multiple times during the life time of an Aspect, or it might not get called at all. Aspects must not depend on the existence of a view for their operation. */ //QWidget* GeneralTest::view() const { // if (!m_partView) { // m_view = new HypothesisTestView(const_cast(this)); // m_partView = m_view; // } // return m_partView; //} /*! Returns a new context menu. The caller takes ownership of the menu. */ QMenu* GeneralTest::createContextMenu() { QMenu* menu = AbstractPart::createContextMenu(); // Q_ASSERT(menu); // emit requestProjectContextMenu(menu); return menu; } diff --git a/src/backend/generalTest/GeneralTest.h b/src/backend/generalTest/GeneralTest.h index 408bc8246..0b1ce6a6d 100644 --- a/src/backend/generalTest/GeneralTest.h +++ b/src/backend/generalTest/GeneralTest.h @@ -1,143 +1,146 @@ /*************************************************************************** File : GeneralTest.h Project : LabPlot Description : Doing Hypothesis-Test on data provided -------------------------------------------------------------------- Copyright : (C) 2019 Devanshu Agarwal(agarwaldevanshu8@gmail.com) ***************************************************************************/ /*************************************************************************** * * * This program is free software; you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * * the Free Software Foundation; either version 2 of the License, or * * (at your option) any later version. * * * * This program is distributed in the hope that it will be useful, * * but WITHOUT ANY WARRANTY; without even the implied warranty of * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * * GNU General Public License for more details. * * * * You should have received a copy of the GNU General Public License * * along with this program; if not, write to the Free Software * * Foundation, Inc., 51 Franklin Street, Fifth Floor, * * Boston, MA 02110-1301 USA * * * ***************************************************************************/ #ifndef GENERALTEST_H #define GENERALTEST_H #include "backend/core/AbstractPart.h" #include "backend/lib/macros.h" #include "kdefrontend/generalTest/GeneralTestView.h" class Spreadsheet; class QString; class Column; class QVBoxLayout; class QLabel; class GeneralTest : public AbstractPart { Q_OBJECT public: explicit GeneralTest(const QString& name, const AspectType& type); ~GeneralTest() override; enum DataSourceType {DataSourceSpreadsheet, DataSourceDatabase}; struct Cell { QString data; int level; bool isHeader; QString tooltip; int rowSpanCount; int columnSpanCount; Cell(QVariant data = "", int level = 0, bool isHeader = false, QString tooltip = "", int rowSpanCount = 1, int columnSpanCount = 1) { this->data = data.toString(); this->level = level; this->isHeader = isHeader; this->tooltip = tooltip; this->rowSpanCount = rowSpanCount; this->columnSpanCount = columnSpanCount; } }; enum ErrorType {ErrorUnqualSize, ErrorEmptyColumn, NoError}; void setDataSourceType(DataSourceType type); DataSourceType dataSourceType() const; void setDataSourceSpreadsheet(Spreadsheet* spreadsheet); Spreadsheet* dataSourceSpreadsheet() const; void setColumns(const QVector& cols); void setColumns(QStringList cols); QStringList allColumns(); QString testName(); QString statsTable(); QVBoxLayout* summaryLayout(); //virtual methods // QIcon icon() const override; QMenu* createContextMenu() override; // QWidget* view() const override; bool exportView() const override; bool printView() override; bool printPreview() const override; void save(QXmlStreamWriter*) const override; bool load(XmlStreamReader*, bool preview) override; signals: void changed(); void requestProjectContextMenu(QMenu*); void dataSourceTypeChanged(GeneralTest::DataSourceType); void dataSourceSpreadsheetChanged(Spreadsheet*); protected: DataSourceType m_dataSourceType{GeneralTest::DataSourceSpreadsheet}; Spreadsheet* m_dataSourceSpreadsheet{nullptr}; QVector m_columns; QStringList m_allColumns; QString m_currTestName; QString m_statsTable; QVBoxLayout* m_summaryLayout{nullptr}; QLabel* m_resultLine[RESULTLINESCOUNT]; + int testType(int test); + int testSubtype(int test); + QString round(QVariant number, int precision = 3); int findCount(const Column* column); double findSum(const Column* column, int N = -1); double findSumSq(const Column* column, int N = -1); double findMean(const Column* column, int N = -1); double findStd(const Column* column, int N, double mean); double findStd(const Column* column, int N = -1); void countPartitions(Column* column, int& np, int& totalRows); // double findSumProducts(const Column* columns[], int N = -1); ErrorType findStats(const Column* column,int& count, double& sum, double& mean, double& std); ErrorType findStatsPaired(const Column* column1, const Column* column2, int& count, double& sum, double& mean, double& std); ErrorType findStatsCategorical(Column* column1, Column* column2, int n[], double sum[], double mean[], double std[], QMap& colName, const int& np, const int& totalRows); QString getHtmlTable(int row, int column, QVariant* rowMajor); QString getHtmlTable3(const QList& rowMajor); QString getLine(const QString& msg, const QString& color = "black"); void printLine(const int& index, const QString& msg, const QString& color = "black"); void printTooltip(const int& index, const QString& msg); void printError(const QString& errorMsg); bool m_dbCreated{false}; mutable GeneralTestView* m_view{nullptr}; }; #endif // GeneralTest_H diff --git a/src/backend/generalTest/HypothesisTest.cpp b/src/backend/generalTest/HypothesisTest.cpp index b5771af86..2ffb99d7c 100644 --- a/src/backend/generalTest/HypothesisTest.cpp +++ b/src/backend/generalTest/HypothesisTest.cpp @@ -1,1145 +1,1127 @@ /*************************************************************************** File : HypothesisTest.cpp Project : LabPlot Description : Doing Hypothesis-Test on data provided -------------------------------------------------------------------- Copyright : (C) 2019 Devanshu Agarwal(agarwaldevanshu8@gmail.com) ***************************************************************************/ /*************************************************************************** * * * This program is free software; you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * * the Free Software Foundation; either version 2 of the License, or * * (at your option) any later version. * * * * This program is distributed in the hope that it will be useful, * * but WITHOUT ANY WARRANTY; without even the implied warranty of * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * * GNU General Public License for more details. * * * * You should have received a copy of the GNU General Public License * * along with this program; if not, write to the Free Software * * Foundation, Inc., 51 Franklin Street, Fifth Floor, * * Boston, MA 02110-1301 USA * * * ***************************************************************************/ #include "HypothesisTest.h" #include "kdefrontend/generalTest/HypothesisTestView.h" #include "backend/spreadsheet/Spreadsheet.h" #include "backend/core/column/Column.h" #include "backend/lib/macros.h" #include #include #include #include #include #include #include #include #include #include #include extern "C" { #include "backend/nsl/nsl_stats.h" } HypothesisTest::HypothesisTest(const QString &name) : GeneralTest(name, AspectType::HypothesisTest) { } HypothesisTest::~HypothesisTest() { } void HypothesisTest::setPopulationMean(QVariant populationMean) { m_populationMean = populationMean.toDouble(); } void HypothesisTest::setSignificanceLevel(QVariant alpha) { m_significanceLevel = alpha.toDouble(); } -void HypothesisTest::performTest(Test test, bool categoricalVariable, bool equalVariance) { - m_tailType = test.tail; +void HypothesisTest::setTail(Tail tail) { + m_tail = tail; +} + +void HypothesisTest::performTest(int test, bool categoricalVariable, bool equalVariance) { m_pValue.clear(); m_statisticValue.clear(); m_statsTable = ""; for (int i = 0; i < RESULTLINESCOUNT; i++) m_resultLine[i]->clear(); - switch (test.subtype) { - case HypothesisTest::Test::SubType::TwoSampleIndependent: { - m_currTestName = "

" + i18n("Two Sample Independent Test") + "

"; - performTwoSampleIndependentTest(test.type, categoricalVariable, equalVariance); - break; - } - case HypothesisTest::Test::SubType::TwoSamplePaired: + switch (testSubtype(test)) { + case TwoSampleIndependent: { + m_currTestName = "

" + i18n("Two Sample Independent Test") + "

"; + performTwoSampleIndependentTest(testType(test), categoricalVariable, equalVariance); + break; + } + case TwoSamplePaired: m_currTestName = "

" + i18n("Two Sample Paired Test") + "

"; - performTwoSamplePairedTest(test.type); + performTwoSamplePairedTest(testType(test)); break; - case HypothesisTest::Test::SubType::OneSample: { - m_currTestName = "

" + i18n("One Sample Test") + "

"; - performOneSampleTest(test.type); - break; - } - case HypothesisTest::Test::SubType::OneWay: { - m_currTestName = "

" + i18n("One Way Anova") + "

"; - performOneWayAnova(); - break; - } - case HypothesisTest::Test::SubType::TwoWay: { - m_currTestName = "

" + i18n("Two Way Anova") + "

"; - performTwoWayAnova(); - break; - } - case HypothesisTest::Test::SubType::NoneSubType: + case OneSample: { + m_currTestName = "

" + i18n("One Sample Test") + "

"; + performOneSampleTest(testType(test)); break; } + case OneWay: { + m_currTestName = "

" + i18n("One Way Anova") + "

"; + performOneWayAnova(); + break; + } + case TwoWay: { + m_currTestName = "

" + i18n("Two Way Anova") + "

"; + performTwoWayAnova(); + break; + } + } emit changed(); } void HypothesisTest::performLeveneTest(bool categoricalVariable) { m_pValue.clear(); m_statisticValue.clear(); m_statsTable = ""; for (int i = 0; i < RESULTLINESCOUNT; i++) m_resultLine[i]->clear(); m_currTestName = "

" + i18n("Levene Test for Equality of Variance") + "

"; m_performLeveneTest(categoricalVariable); emit changed(); } QList& HypothesisTest::statisticValue() { return m_statisticValue; } QList& HypothesisTest::pValue() { return m_pValue; } /****************************************************************************** * Private Implementations * ****************************************************************************/ //TODO: backend of z test; -//TODO: add tooltip to tables. (currently it is not possible to use with QTextDocument); //TODO: use https://www.gnu.org/software/gsl/doc/html/statistics.html for basic statistic calculations /**************************Two Sample Independent *************************************/ -void HypothesisTest::performTwoSampleIndependentTest(HypothesisTest::Test::Type test, bool categoricalVariable, bool equalVariance) { +void HypothesisTest::performTwoSampleIndependentTest(int test, bool categoricalVariable, bool equalVariance) { if (m_columns.size() != 2) { printError("Inappropriate number of m_columns selected"); return; } int n[2]; double sum[2], mean[2], std[2]; QString col1Name = m_columns[0]->name(); QString col2Name = m_columns[1]->name(); if (!categoricalVariable && m_columns[0]->isNumeric()) { for (int i = 0; i < 2; i++) { findStats(m_columns[i], n[i], sum[i], mean[i], std[i]); if (n[i] == 0) { printError("At least two values should be there in every column"); return; } if (std[i] == 0.0) { printError(i18n("Standard Deviation of at least one column is equal to 0: last column is: %1", m_columns[i]->name())); return; } } } else { QMap colName; QString baseColName; int np; int totalRows; countPartitions(m_columns[0], np, totalRows); if (np != 2) { printError( i18n("Number of Categorical Variable in Column %1 is not equal to 2", m_columns[0]->name())); return; } if (m_columns[0]->isNumeric()) baseColName = m_columns[0]->name(); ErrorType errorCode = findStatsCategorical(m_columns[0], m_columns[1], n, sum, mean, std, colName, np, totalRows); switch (errorCode) { case ErrorUnqualSize: { - printError( i18n("Unequal size between Column %1 and Column %2", m_columns[0]->name(), m_columns[1]->name())); - return; - } + printError( i18n("Unequal size between Column %1 and Column %2", m_columns[0]->name(), m_columns[1]->name())); + return; + } case ErrorEmptyColumn: { - printError("At least one of selected column is empty"); - - return; - } + printError("At least one of selected column is empty"); + return; + } case NoError: break; } QMapIterator i(colName); while (i.hasNext()) { i.next(); if (i.value() == 1) col1Name = baseColName + " " + i.key(); else col2Name = baseColName + " " + i.key(); } } QVariant rowMajor[] = {"", "N", "Sum", "Mean", "Std", - col1Name, n[0], sum[0], mean[0], std[0], - col2Name, n[1], sum[1], mean[1], std[1] - }; + col1Name, n[0], sum[0], mean[0], std[0], + col2Name, n[1], sum[1], mean[1], std[1] + }; m_statsTable = getHtmlTable(3, 5, rowMajor); for (int i = 0; i < 2; i++) { if (n[i] == 0) { printError("At least two values should be there in every column"); return; } if (std[i] == 0.0) { printError( i18n("Standard Deviation of at least one column is equal to 0: last column is: %1", m_columns[i]->name())); return; } } double stdSq[2]; stdSq[0] = gsl_pow_2(std[0]); stdSq[1] = gsl_pow_2(std[1]); QString testName; int df = 0; double spSq = 0; - switch (test) { - case HypothesisTest::Test::Type::TTest: { - testName = "T"; - - if (equalVariance) { - df = n[0] + n[1] - 2; - - spSq = ((n[0]-1) * stdSq[0] + - (n[1]-1) * stdSq[1] ) / df; - QDEBUG("equal variance : spSq is " << spSq); - m_statisticValue.append((mean[0] - mean[1]) / sqrt(spSq / n[0] + spSq / n[1])); - printLine(9, "Assumption: Equal Variance b/w both population means"); - } else { - double temp_val; - temp_val = gsl_pow_2( gsl_pow_2(std[0]) / n[0] + gsl_pow_2(std[1]) / n[1]); - temp_val = temp_val / ( (gsl_pow_2( (gsl_pow_2(std[0]) / n[0]) ) / (n[0]-1)) + - (gsl_pow_2( (gsl_pow_2(std[1]) / n[1]) ) / (n[1]-1))); - df = qRound(temp_val); - - m_statisticValue.append((mean[0] - mean[1]) / (sqrt( (gsl_pow_2(std[0])/n[0]) + - (gsl_pow_2(std[1])/n[1])))); - printLine(9, "Assumption: UnEqual Variance b/w both population means"); - } + switch (testType(test)) { + case TTest: { + testName = "T"; - printLine(8, "Assumption: Both Populations approximately follow normal distribution"); - break; - } - case HypothesisTest::Test::Type::ZTest: { - testName = "Z"; - spSq = ((n[0]-1) * gsl_pow_2(std[0]) + (n[1]-1) * gsl_pow_2(std[1])) / df; + if (equalVariance) { + df = n[0] + n[1] - 2; + + spSq = ((n[0]-1) * stdSq[0] + + (n[1]-1) * stdSq[1] ) / df; + // QDEBUG("equal variance : spSq is " << spSq); m_statisticValue.append((mean[0] - mean[1]) / sqrt(spSq / n[0] + spSq / n[1])); - // m_pValue.append(gsl_cdf_gaussian_P(m_statisticValue, sp)); - break; + printLine(9, "Assumption: Equal Variance b/w both population means"); + } else { + double temp_val; + temp_val = gsl_pow_2( gsl_pow_2(std[0]) / n[0] + gsl_pow_2(std[1]) / n[1]); + temp_val = temp_val / ( (gsl_pow_2( (gsl_pow_2(std[0]) / n[0]) ) / (n[0]-1)) + + (gsl_pow_2( (gsl_pow_2(std[1]) / n[1]) ) / (n[1]-1))); + df = qRound(temp_val); + + m_statisticValue.append((mean[0] - mean[1]) / (sqrt( (gsl_pow_2(std[0])/n[0]) + + (gsl_pow_2(std[1])/n[1])))); + printLine(9, "Assumption: UnEqual Variance b/w both population means"); } - case HypothesisTest::Test::Type::Anova: - case HypothesisTest::Test::Type::NoneType: + + printLine(8, "Assumption: Both Populations approximately follow normal distribution"); + break; + } + case ZTest: { + testName = "Z"; + spSq = ((n[0]-1) * gsl_pow_2(std[0]) + (n[1]-1) * gsl_pow_2(std[1])) / df; + m_statisticValue.append((mean[0] - mean[1]) / sqrt(spSq / n[0] + spSq / n[1])); + // m_pValue.append(gsl_cdf_gaussian_P(m_statisticValue, sp)); break; } + } m_currTestName = "

" + i18n("Two Sample Independent %1 Test for %2 vs %3", testName, col1Name, col2Name) + "

"; m_pValue.append(getPValue(test, m_statisticValue[0], col1Name, col2Name, (mean[0] - mean[1]), sqrt(spSq), df)); printLine(2, i18n("Significance level is %1", round(m_significanceLevel)), "blue"); printLine(4, i18n("%1 Value is %2 ", testName, round(m_statisticValue[0])), "green"); printTooltip(4, i18n("More is the |%1-value|, more safely we can reject the null hypothesis", testName)); printLine(5, i18n("P Value is %1 ", m_pValue[0]), "green"); printLine(6, i18n("Degree of Freedom is %1", df), "green"); printTooltip(6, i18n("Number of independent Pieces of information that went into calculating the estimate")); if (m_pValue[0] <= m_significanceLevel) printTooltip(5, i18n("We can safely reject Null Hypothesis for significance level %1", round(m_significanceLevel))); else printTooltip(5, i18n("There is a plausibility for Null Hypothesis to be true")); return; } /********************************Two Sample Paired ***************************************/ -void HypothesisTest::performTwoSamplePairedTest(HypothesisTest::Test::Type test) { +void HypothesisTest::performTwoSamplePairedTest(int test) { if (m_columns.size() != 2) { printError("Inappropriate number of m_columns selected"); return; } for (int i = 0; i < 2; i++) { if ( !m_columns[0]->isNumeric()) { printError("select only m_columns with numbers"); return; } } int n; double sum, mean, std; ErrorType errorCode = findStatsPaired(m_columns[0], m_columns[1], n, sum, mean, std); switch (errorCode) { case ErrorUnqualSize: { - printError("both m_columns are having different sizes"); + printError("both m_columns are having different sizes"); - return; - } + return; + } case ErrorEmptyColumn: { - printError("m_columns are empty"); - return; - } + printError("m_columns are empty"); + return; + } case NoError: break; } QVariant rowMajor[] = {"", "N", "Sum", "Mean", "Std", - "difference", n, sum, mean, std - }; + "difference", n, sum, mean, std + }; m_statsTable = getHtmlTable(2, 5, rowMajor); if (std == 0.0) { printError("Standard deviation of the difference is 0"); return; } QString testName; int df = 0; switch (test) { - case HypothesisTest::Test::Type::TTest: { - m_statisticValue[0] = mean / (std / sqrt(n)); - df = n - 1; - testName = "T"; - printLine(6, i18n("Degree of Freedom is %1name(), i18n("%1", m_populationMean), mean, std, df)); m_currTestName = "

" + i18n("One Sample %1 Test for %2 vs %3", testName, m_columns[0]->name(), m_columns[1]->name()) + "

"; printLine(2, i18n("Significance level is %1 ", round(m_significanceLevel)), "blue"); printLine(4, i18n("%1 Value is %2 ", testName, round(m_statisticValue[0])), "green"); printLine(5, i18n("P Value is %1 ", m_pValue[0]), "green"); if (m_pValue[0] <= m_significanceLevel) printTooltip(5, i18n("We can safely reject Null Hypothesis for significance level %1", m_significanceLevel)); else printTooltip(5, i18n("There is a plausibility for Null Hypothesis to be true")); return; } /******************************** One Sample ***************************************/ -void HypothesisTest::performOneSampleTest(HypothesisTest::Test::Type test) { +void HypothesisTest::performOneSampleTest(int test) { if (m_columns.size() != 1) { printError("Inappropriate number of m_columns selected"); - return; } if ( !m_columns[0]->isNumeric()) { printError("select only m_columns with numbers"); - return; } int n; double sum, mean, std; ErrorType errorCode = findStats(m_columns[0], n, sum, mean, std); switch (errorCode) { case ErrorEmptyColumn: { - printError("column is empty"); - return; - } + printError("column is empty"); + return; + } case NoError: break; case ErrorUnqualSize: { - return; - } + return; + } } QVariant rowMajor[] = {"", "N", "Sum", "Mean", "Std", - m_columns[0]->name(), n, sum, mean, std - }; + m_columns[0]->name(), n, sum, mean, std + }; m_statsTable = getHtmlTable(2, 5, rowMajor); if (std == 0.0) { printError("Standard deviation is 0"); return; } - QString testName; int df = 0; switch (test) { - case HypothesisTest::Test::Type::TTest: { - testName = "T"; - m_statisticValue.append((mean - m_populationMean) / (std / sqrt(n))); - df = n - 1; - printLine(6, i18n("Degree of Freedom is %1", df), "blue"); - break; - } - case HypothesisTest::Test::Type::ZTest: { - testName = "Z"; - df = 0; - m_statisticValue.append((mean - m_populationMean) / (std / sqrt(n))); - break; - } - case HypothesisTest::Test::Type::Anova: - case HypothesisTest::Test::Type::NoneType: + case TTest: { + testName = "T"; + m_statisticValue.append((mean - m_populationMean) / (std / sqrt(n))); + df = n - 1; + printLine(6, i18n("Degree of Freedom is %1", df), "blue"); + break; + } + case ZTest: { + testName = "Z"; + df = 0; + m_statisticValue.append((mean - m_populationMean) / (std / sqrt(n))); break; } + } m_pValue.append(getPValue(test, m_statisticValue[0], m_columns[0]->name(), i18n("%1",m_populationMean), mean - m_populationMean, std, df)); m_currTestName = "

" + i18n("One Sample %1 Test for %2", testName, m_columns[0]->name()) + "

"; printLine(2, i18n("Significance level is %1", round(m_significanceLevel)), "blue"); printLine(4, i18n("%1 Value is %2", testName, round(m_statisticValue[0])), "green"); printLine(5, i18n("P Value is %1", m_pValue[0]), "green"); if (m_pValue[0] <= m_significanceLevel) printTooltip(5, i18n("We can safely reject Null Hypothesis for significance level %1", m_significanceLevel)); else printTooltip(5, i18n("There is a plausibility for Null Hypothesis to be true")); return; } /*************************************One Way Anova***************************************/ // all standard variables and formulas are taken from this wikipedia page: // https://en.wikipedia.org/wiki/One-way_analysis_of_variance // b stands for b/w groups // w stands for within groups // np is number of partition i.e., number of classes void HypothesisTest::performOneWayAnova() { int np, totalRows; countPartitions(m_columns[0], np, totalRows); int* ni = new int[np]; double* sum = new double[np]; double* mean = new double[np]; double* std = new double[np]; QString* colNames = new QString[np]; QMap classnameToIndex; QString baseColName; if (m_columns[0]->isNumeric()) baseColName = m_columns[0]->name(); findStatsCategorical(m_columns[0], m_columns[1], ni, sum, mean, std, classnameToIndex, np, totalRows); double yBar = 0; // overall mean double sB = 0; // sum of squares of (mean - overall_mean) between the groups int fB = 0; // degree of freedom between the groups double msB = 0; // mean sum of squares between the groups double sW = 0; // sum of squares of (value - mean of group) within the groups int fW = 0; // degree of freedom within the group double msW = 0; // mean sum of squares within the groups // now finding mean of each group; for (int i = 0; i < np; i++) yBar += mean[i]; yBar = yBar / np; for (int i = 0; i < np; i++) { sB += ni[i] * gsl_pow_2( ( mean[i] - yBar)); if (ni[i] > 1) sW += gsl_pow_2( std[i])*(ni[i] - 1); else sW += gsl_pow_2( std[i]); fW += ni[i] - 1; } fB = np - 1; msB = sB / fB; msW = sW / fW; m_statisticValue.append(msB / msW); m_pValue.append(nsl_stats_fdist_p(m_statisticValue[0], static_cast(np-1), fW)); QMapIterator i(classnameToIndex); while (i.hasNext()) { i.next(); colNames[i.value()-1] = baseColName + " " + i.key(); } // now printing the statistics and result; int rowCount = np + 1, columnCount = 5; QVariant* rowMajor = new QVariant[rowCount*columnCount]; // header data; rowMajor[0] = ""; rowMajor[1] = "Ni"; rowMajor[2] = "Sum"; rowMajor[3] = "Mean"; rowMajor[4] = "Std"; // table data for (int row_i = 1; row_i < rowCount ; row_i++) { rowMajor[row_i*columnCount] = colNames[row_i - 1]; rowMajor[row_i*columnCount + 1] = ni[row_i - 1]; rowMajor[row_i*columnCount + 2] = sum[row_i - 1]; rowMajor[row_i*columnCount + 3] = mean[row_i - 1]; rowMajor[row_i*columnCount + 4] = std[row_i - 1]; } m_statsTable = "

" + i18n("Group Summary Statistics") + "

"; m_statsTable += getHtmlTable(rowCount, columnCount, rowMajor); m_statsTable += getLine(""); m_statsTable += getLine(""); m_statsTable += "

" + i18n("Grand Summary Statistics") + "

"; m_statsTable += getLine(""); m_statsTable += getLine(i18n("Overall Mean is %1", round(yBar))); rowCount = 4; columnCount = 3; rowMajor->clear(); rowMajor[0] = ""; rowMajor[1] = "Between Groups"; rowMajor[2] = "Within Groups"; int baseIndex = 0; baseIndex = 1 * columnCount; rowMajor[baseIndex + 0] = "Sum of Squares"; rowMajor[baseIndex + 1] = sB; rowMajor[baseIndex + 2] = sW; baseIndex = 2 * columnCount; rowMajor[baseIndex + 0] = "Degree of Freedom"; rowMajor[baseIndex + 1] = fB; rowMajor[baseIndex + 2] = fW; baseIndex = 3 * columnCount; rowMajor[baseIndex + 0] = "Mean Square Value"; rowMajor[baseIndex + 1] = msB; rowMajor[baseIndex + 2] = msW; m_statsTable += getHtmlTable(rowCount, columnCount, rowMajor); delete[] ni; delete[] sum; delete[] mean; delete[] std; delete[] colNames; printLine(1, i18n("F Value is %1", round(m_statisticValue[0])), "green"); printLine(2, i18n("P Value is %1 ", m_pValue[0]), "green"); if (m_pValue[0] <= m_significanceLevel) printTooltip(2, i18n("We can safely reject Null Hypothesis for significance level %1", m_significanceLevel)); else printTooltip(2, i18n("There is a plausibility for Null Hypothesis to be true")); return; } /*************************************Two Way Anova***************************************/ // all formulas and symbols are taken from: http://statweb.stanford.edu/~susan/courses/s141/exanova.pdf //TODO: suppress warning of variable length array are a C99 feature. //TODO: add assumptions verification option //TODO: add tail option (if needed) void HypothesisTest::performTwoWayAnova() { int np_a, totalRows_a; int np_b, totalRows_b; countPartitions(m_columns[0], np_a, totalRows_a); countPartitions(m_columns[1], np_b, totalRows_b); QVector> groupMean(np_a, QVector(np_b)); QVector> replicates(np_a, QVector(np_b)); for (int i = 0; i < np_a; i++) for (int j = 0; j < np_b; j++) { groupMean[i][j] = 0; replicates[i][j] = 0; } if (totalRows_a != totalRows_b) { printError("There is missing data in at least one of the rows"); return; } QMap catToNumber_a; QMap catToNumber_b; int partitionNumber_a = 1; int partitionNumber_b = 1; for (int i = 0; i < totalRows_a; i++) { QString name_a = m_columns[0]->textAt(i); QString name_b = m_columns[1]->textAt(i); double value = m_columns[2]->valueAt(i); if (catToNumber_a[name_a] == 0) { catToNumber_a[name_a] = partitionNumber_a; partitionNumber_a++; } if (catToNumber_b[name_b] == 0) { catToNumber_b[name_b] = partitionNumber_b; partitionNumber_b++; } groupMean[catToNumber_a[name_a] - 1][catToNumber_b[name_b] - 1] += value; replicates[catToNumber_a[name_a] - 1][catToNumber_b[name_b] - 1] += 1; } int replicate = replicates[0][0]; for (int i = 0; i < np_a; i++) for (int j = 0; j < np_b; j++) { if (replicates[i][j] == 0) { printError("Dataset should have at least one data value corresponding to each feature combination"); return; } if (replicates[i][j] != replicate) { printError("Number of experiments perfomed for each combination of levels
" - "between Independet Var.1 and Independent Var.2 must be equal"); + "between Independet Var.1 and Independent Var.2 must be equal"); return; } groupMean[i][j] /= replicates[i][j]; } double ss_within = 0; for (int i = 0; i < totalRows_a; i++) { QString name_a = m_columns[0]->textAt(i); QString name_b = m_columns[1]->textAt(i); double value = m_columns[2]->valueAt(i); ss_within += gsl_pow_2(value - groupMean[catToNumber_a[name_a] - 1][catToNumber_b[name_b] - 1]); } int df_within = (replicate - 1) * np_a * np_b; double ms_within = ss_within / df_within; double* mean_a = new double[np_a]; double* mean_b = new double[np_b]; for (int i = 0; i < np_a; i++) { for (int j = 0; j < np_b; j++) { mean_a[i] += groupMean[i][j] / np_b; mean_b[j] += groupMean[i][j] / np_a; } } double mean = 0; for (int i = 0; i < np_a; i++) mean += mean_a[i] / np_a; double ss_a = 0; for (int i = 0; i < np_a; i++) ss_a += gsl_pow_2(mean_a[i] - mean); ss_a *= replicate * np_b; int df_a = np_a - 1; double ms_a = ss_a / df_a; double ss_b = 0; for (int i = 0; i < np_b; i++) ss_b += gsl_pow_2(mean_b[i] - mean); ss_b *= replicate * np_a; int df_b = np_b - 1; double ms_b = ss_b / df_b; double ss_interaction = 0; for (int i = 0; i < np_a; i++) for (int j = 0; j < np_b; j++) ss_interaction += gsl_pow_2(groupMean[i][j] - mean_a[i] - mean_b[j] + mean); ss_interaction *= replicate; int df_interaction = (np_a - 1) * (np_b - 1); double ms_interaction = ss_interaction / df_interaction; QString* partitionNames_a = new QString[np_a]; QString* partitionNames_b = new QString[np_b]; QMapIterator itr_a(catToNumber_a); while (itr_a.hasNext()) { itr_a.next(); partitionNames_a[itr_a.value()-1] = itr_a.key(); } QMapIterator itr_b(catToNumber_b); while (itr_b.hasNext()) { itr_b.next(); partitionNames_b[itr_b.value()-1] = itr_b.key(); } // printing table; // cell constructor structure; data, level, rowSpanCount, m_columnspanCount, isHeader; QList rowMajor; rowMajor.append(new Cell("", 0, true, "", 2, 1)); for (int i = 0; i < np_b; i++) rowMajor.append(new Cell(partitionNames_b[i], 0, true, "", 1, 2)); rowMajor.append(new Cell("Mean", 0, true, "", 2)); for (int i = 0; i < np_b; i++) { rowMajor.append(new Cell("Mean", 1, true)); rowMajor.append(new Cell("Replicate", 1, true)); } int level = 2; for (int i = 0; i < np_a; i++) { rowMajor.append(new Cell(partitionNames_a[i], level, true)); for (int j = 0; j < np_b; j++) { rowMajor.append(new Cell(round(groupMean[i][j]), level)); rowMajor.append(new Cell(replicates[i][j], level)); } rowMajor.append(new Cell(round(mean_a[i]), level)); level++; } rowMajor.append(new Cell("Mean", level, true)); for (int i = 0; i < np_b; i++) rowMajor.append(new Cell(round(mean_b[i]), level, false, "", 1, 2)); rowMajor.append(new Cell(round(mean), level)); m_statsTable = "

" + i18n("Contingency Table") + "

"; m_statsTable += getHtmlTable3(rowMajor); m_statsTable += "
"; m_statsTable += "

" + i18n("results table") + "

"; rowMajor.clear(); level = 0; rowMajor.append(new Cell("", level, true)); rowMajor.append(new Cell("SS", level, true)); rowMajor.append(new Cell("DF", level, true, "degree of freedom")); rowMajor.append(new Cell("MS", level, true)); level++; rowMajor.append(new Cell(m_columns[0]->name(), level, true)); rowMajor.append(new Cell(round(ss_a), level)); rowMajor.append(new Cell(df_a, level)); rowMajor.append(new Cell(round(ms_a), level)); level++; rowMajor.append(new Cell(m_columns[1]->name(), level, true)); rowMajor.append(new Cell(round(ss_b), level)); rowMajor.append(new Cell(df_b, level)); rowMajor.append(new Cell(round(ms_b), level)); level++; rowMajor.append(new Cell("Interaction", level, true)); rowMajor.append(new Cell(round(ss_interaction), level)); rowMajor.append(new Cell(df_interaction, level)); rowMajor.append(new Cell(round(ms_interaction), level)); level++; rowMajor.append(new Cell("Within", level, true)); rowMajor.append(new Cell(round(ss_within), level)); rowMajor.append(new Cell(df_within, level)); rowMajor.append(new Cell(round(ms_within), level)); m_statsTable += getHtmlTable3(rowMajor); double fValue_a = ms_a / ms_within; double fValue_b = ms_b / ms_within; double fValue_interaction = ms_interaction / ms_within; double m_pValue_a = nsl_stats_fdist_p(fValue_a, static_cast(np_a - 1), df_a); double m_pValue_b = nsl_stats_fdist_p(fValue_b, static_cast(np_b - 1), df_b); printLine(0, "F(df" + m_columns[0]->name() + ", dfwithin) is " + round(fValue_a), "blue"); printLine(1, "F(df" + m_columns[1]->name() + ", dfwithin) is " + round(fValue_b), "blue"); printLine(2, "F(dfinteraction, dfwithin) is " + round(fValue_interaction), "blue"); printLine(4, "P(df" + m_columns[0]->name() + ", dfwithin) is " + round(m_pValue_a), "blue"); printLine(5, "P(df" + m_columns[1]->name() + ", dfwithin) is " + round(m_pValue_b), "blue"); // printLine(2, "P(dfinteraction, dfwithin) is " + round(fValue_interaction), "blue"); m_statisticValue.append(fValue_a); m_statisticValue.append(fValue_b); m_statisticValue.append(fValue_interaction); m_pValue.append(m_pValue_a); m_pValue.append(m_pValue_b); delete[] mean_a; delete[] mean_b; delete[] partitionNames_a; delete[] partitionNames_b; return; } /**************************************Levene Test****************************************/ // Some reference to local variables. // np = number of partitions // df = degree of fredom // totalRows = total number of rows in column // these variables are taken from: https://en.wikipedia.org/wiki/Levene%27s_test // yiBar = mean of ith group; // Zij = |Yij - yiBar| // ziBar = mean of Zij for group i // ziBarBar = mean for all zij // ni = number of elements in group i void HypothesisTest::m_performLeveneTest(bool categoricalVariable) { if (m_columns.size() != 2) { printError("Inappropriate number of m_columns selected"); return; } int np = 0; int n = 0; if (!categoricalVariable && m_columns[0]->isNumeric()) np = m_columns.size(); else countPartitions(m_columns[0], np, n); if (np < 2) { printError("Select at least two m_columns / classes"); return; } double* yiBar = new double[np]; double* ziBar = new double[np]; double ziBarBar = 0; double* ni = new double[np]; for (int i = 0; i < np; i++) { yiBar[i] = 0; ziBar[i] = 0; ni[i] = 0; } double fValue; int df = 0; int totalRows = 0; QString* colNames = new QString[np]; if (!categoricalVariable && m_columns[0]->isNumeric()) { totalRows = m_columns[0]->rowCount(); double value = 0; for (int j = 0; j < totalRows; j++) { int numberNaNCols = 0; for (int i = 0; i < np; i++) { value = m_columns[i]->valueAt(j); if (std::isnan(value)) { numberNaNCols++; continue; } yiBar[i] += value; ni[i]++; n++; } if (numberNaNCols == np) { totalRows = j; break; } } for (int i = 0; i < np; i++) { if (ni[i] > 0) yiBar[i] = yiBar[i] / ni[i]; else { printError("One of the selected m_columns is empty
" - "or have choosen Independent Var.1 wrongly"); + "or have choosen Independent Var.1 wrongly"); return; } } for (int j = 0; j < totalRows; j++) { for (int i = 0; i < np; i++) { value = m_columns[i]->valueAt(j); if (!(std::isnan(value))) ziBar[i] += fabs(value - yiBar[i]); } } for (int i = 0; i < np; i++) { ziBarBar += ziBar[i]; if (ni[i] > 0) ziBar[i] = ziBar[i] / ni[i]; } ziBarBar = ziBarBar / n; double numberatorValue = 0; double denominatorValue = 0; for (int j = 0; j < totalRows; j++) { for (int i = 0; i < np; i++) { value = m_columns[i]->valueAt(j); if (!(std::isnan(value))) { double zij = fabs(value - yiBar[i]); denominatorValue += gsl_pow_2( (zij - ziBar[i])); } } } if (denominatorValue == 0.0) { printError( i18n("Denominator value is %1", denominatorValue)); return; } for (int i = 0; i < np; i++) { colNames[i] = m_columns[i]->name(); numberatorValue += ni[i]*gsl_pow_2( (ziBar[i]-ziBarBar)); } fValue = ((n - np) / (np - 1)) * (numberatorValue / denominatorValue); } else { QMap classnameToIndex; AbstractColumn::ColumnMode originalColMode = m_columns[0]->columnMode(); m_columns[0]->setColumnMode(AbstractColumn::Text); int partitionNumber = 1; QString name; double value; int classIndex; for (int j = 0; j < n; j++) { name = m_columns[0]->textAt(j); value = m_columns[1]->valueAt(j); if (std::isnan(value)) { n = j; break; } if (classnameToIndex[name] == 0) { classnameToIndex[name] = partitionNumber; partitionNumber++; } classIndex = classnameToIndex[name]-1; ni[classIndex]++; yiBar[classIndex] += value; } for (int i = 0; i < np; i++) { if (ni[i] > 0) yiBar[i] = yiBar[i] / ni[i]; else { printError("One of the selected m_columns is empty
" - "or have choosen Independent Var.1 wrongly"); + "or have choosen Independent Var.1 wrongly"); m_columns[0]->setColumnMode(originalColMode); return; } } for (int j = 0; j < n; j++) { name = m_columns[0]->textAt(j); value = m_columns[1]->valueAt(j); classIndex = classnameToIndex[name] - 1; ziBar[classIndex] += fabs(value - yiBar[classIndex]); } for (int i = 0; i < np; i++) { ziBarBar += ziBar[i]; ziBar[i] = ziBar[i] / ni[i]; } ziBarBar = ziBarBar / n; double numberatorValue = 0; double denominatorValue = 0; for (int j = 0; j < n; j++) { name = m_columns[0]->textAt(j); value = m_columns[1]->valueAt(j); classIndex = classnameToIndex[name] - 1; double zij = fabs(value - yiBar[classIndex]); denominatorValue += gsl_pow_2( (zij - ziBar[classIndex])); } for (int i = 0; i < np; i++) numberatorValue += ni[i]*gsl_pow_2( (ziBar[i]-ziBarBar)); if (denominatorValue == 0.0) { printError( "number of data points is less or than equal to number of categorical variables"); m_columns[0]->setColumnMode(originalColMode); return; } fValue = ((n - np) / (np - 1)) * (numberatorValue / denominatorValue); QMapIterator i(classnameToIndex); while (i.hasNext()) { i.next(); colNames[i.value()-1] = m_columns[0]->name() + " " + i.key(); } m_columns[0]->setColumnMode(originalColMode); } df = n - np; // now making the stats table. int rowCount = np+1; int columnCount = 4; QVariant* rowMajor = new QVariant[rowCount*columnCount]; // header data; rowMajor[0] = ""; rowMajor[1] = "Ni"; rowMajor[2] = "yiBar"; rowMajor[3] = "ziBar"; // table data for (int row_i = 1; row_i < rowCount; row_i++) { rowMajor[row_i*columnCount] = colNames[row_i-1]; rowMajor[row_i*columnCount + 1] = ni[row_i-1]; rowMajor[row_i*columnCount + 2] = yiBar[row_i-1]; rowMajor[row_i*columnCount + 3] = ziBar[row_i-1]; } m_statsTable = getHtmlTable(rowCount, columnCount, rowMajor); delete[] rowMajor; delete[] yiBar; delete[] ziBar; delete[] ni; m_pValue.append(nsl_stats_fdist_p(fValue, static_cast(np-1), df)); printLine(0, "Null Hypothesis: Variance is equal between all classes", "blue"); printLine(1, "Alternate Hypothesis: Variance is not equal in at-least one pair of classes", "blue"); printLine(2, i18n("Significance level is %1", round(m_significanceLevel)), "blue"); printLine(4, i18n("F Value is %1 ", round(fValue)), "green"); printLine(5, i18n("P Value is %1 ", m_pValue[0]), "green"); printLine(6, i18n("Degree of Freedom is %1", df), "green"); if (m_pValue[0] <= m_significanceLevel) { printTooltip(5, i18n("We can safely reject Null Hypothesis for significance level %1", m_significanceLevel)); printLine(8, "Requirement for homogeneity is not met", "red"); } else { printTooltip(5, i18n("There is a plausibility for Null Hypothesis to be true")); printLine(8, "Requirement for homogeneity is met", "green"); } m_statisticValue.append(fValue); return; } //TODO change ("⋖") symbol to ("<"), currently macro UTF8_QSTRING is not working properly if used "<" symbol; // TODO: check for correctness between: for TestZ with TailTwo // m_pValue.append(2*gsl_cdf_tdist_P(value, df) v/s // m_pValue.append(gsl_cdf_tdis_P(value, df) + gsl_cdf_tdis_P(-value, df); -double HypothesisTest::getPValue(const HypothesisTest::Test::Type& test, double& value, const QString& col1Name, const QString& col2Name, const double mean, const double sp, const int df) { +double HypothesisTest::getPValue(const int& test, double& value, const QString& col1Name, const QString& col2Name, const double mean, const double sp, const int df) { switch (test) { - case HypothesisTest::Test::Type::TTest: { - switch (m_tailType) { - case HypothesisTest::Test::Tail::Negative: { - m_pValue.append(gsl_cdf_tdist_P(value, df)); - printLine(0, i18n("Null Hypothesis: Population mean of %1 %2 Population mean of %3", col1Name, UTF8_QSTRING("≥"), col2Name), "blue"); - printLine(1, i18n("Alternate Hypothesis: Population mean of %1 %2 Population mean of %3", col1Name, UTF8_QSTRING("⋖"), col2Name), "blue"); - break; - } - case HypothesisTest::Test::Tail::Positive: { - value *= -1; - m_pValue.append(gsl_cdf_tdist_P(value, df)); - printLine(0, i18n("Null Hypothesis: Population mean of %1 %2 Population mean of %3", col1Name, UTF8_QSTRING("≤"), col2Name), "blue"); - printLine(1, i18n("Alternate Hypothesis: Population mean of %1 %2 Population mean of %3", col1Name, UTF8_QSTRING(">"), col2Name), "blue"); - break; - } - case HypothesisTest::Test::Tail::Two: { - m_pValue.append(2.*gsl_cdf_tdist_P(-fabs(value), df)); + case TTest: { + switch (m_tail) { + case Negative: { + m_pValue.append(gsl_cdf_tdist_P(value, df)); + printLine(0, i18n("Null Hypothesis: Population mean of %1 %2 Population mean of %3", col1Name, UTF8_QSTRING("≥"), col2Name), "blue"); + printLine(1, i18n("Alternate Hypothesis: Population mean of %1 %2 Population mean of %3", col1Name, UTF8_QSTRING("⋖"), col2Name), "blue"); + break; + } + case Positive: { + value *= -1; + m_pValue.append(gsl_cdf_tdist_P(value, df)); + printLine(0, i18n("Null Hypothesis: Population mean of %1 %2 Population mean of %3", col1Name, UTF8_QSTRING("≤"), col2Name), "blue"); + printLine(1, i18n("Alternate Hypothesis: Population mean of %1 %2 Population mean of %3", col1Name, UTF8_QSTRING(">"), col2Name), "blue"); + break; + } + case Two: { + m_pValue.append(2.*gsl_cdf_tdist_P(-fabs(value), df)); - printLine(0, i18n("Null Hypothesis: Population mean of %1 %2 Population mean of %3", col1Name, UTF8_QSTRING("="), col2Name), "blue"); - printLine(1, i18n("Alternate Hypothesis: Population mean of %1 %2 Population mean of %3", col1Name, UTF8_QSTRING("≠"), col2Name), "blue"); - break; - } - } + printLine(0, i18n("Null Hypothesis: Population mean of %1 %2 Population mean of %3", col1Name, UTF8_QSTRING("="), col2Name), "blue"); + printLine(1, i18n("Alternate Hypothesis: Population mean of %1 %2 Population mean of %3", col1Name, UTF8_QSTRING("≠"), col2Name), "blue"); break; } - case HypothesisTest::Test::Type::ZTest: { - switch (m_tailType) { - case HypothesisTest::Test::Tail::Negative: { - m_pValue.append(gsl_cdf_gaussian_P(value - mean, sp)); - printLine(0, i18n("Null Hypothesis: Population mean of %1 %2 Population mean of %3 ", col1Name, UTF8_QSTRING("≥"), col2Name), "blue"); - printLine(1, i18n("Alternate Hypothesis: Population mean of %1 %2 Population mean of %3 ", col1Name, UTF8_QSTRING("⋖"), col2Name), "blue"); - break; - } - case HypothesisTest::Test::Tail::Positive: { - value *= -1; - m_pValue.append(nsl_stats_tdist_p(value - mean, sp)); - printLine(0, i18n("Null Hypothesis: Population mean of %1 %2 Population mean of %3 ", col1Name, UTF8_QSTRING("≤"), col2Name), "blue"); - printLine(1, i18n("Alternate Hypothesis: Population mean of %1 %2 Population mean of %3 ", col1Name, UTF8_QSTRING(">"), col2Name), "blue"); - break; - } - case HypothesisTest::Test::Tail::Two: { - m_pValue.append(2.*gsl_cdf_gaussian_P(value - mean, sp)); - printLine(0, i18n("Null Hypothesis: Population mean of %1 %2 Population mean of %3 ", col1Name, UTF8_QSTRING("="), col2Name), "blue"); - printLine(1, i18n("Alternate Hypothesis: Population mean of %1 %2 Population mean of %3 ", col1Name, UTF8_QSTRING("≠"), col2Name), "blue"); - break; - } - } + } + break; + } + case ZTest: { + switch (m_tail) { + case Negative: { + m_pValue.append(gsl_cdf_gaussian_P(value - mean, sp)); + printLine(0, i18n("Null Hypothesis: Population mean of %1 %2 Population mean of %3 ", col1Name, UTF8_QSTRING("≥"), col2Name), "blue"); + printLine(1, i18n("Alternate Hypothesis: Population mean of %1 %2 Population mean of %3 ", col1Name, UTF8_QSTRING("⋖"), col2Name), "blue"); break; } - case HypothesisTest::Test::Type::Anova: - case HypothesisTest::Test::Type::NoneType: + case Positive: { + value *= -1; + m_pValue.append(nsl_stats_tdist_p(value - mean, sp)); + printLine(0, i18n("Null Hypothesis: Population mean of %1 %2 Population mean of %3 ", col1Name, UTF8_QSTRING("≤"), col2Name), "blue"); + printLine(1, i18n("Alternate Hypothesis: Population mean of %1 %2 Population mean of %3 ", col1Name, UTF8_QSTRING(">"), col2Name), "blue"); + break; + } + case Two: { + m_pValue.append(2.*gsl_cdf_gaussian_P(value - mean, sp)); + printLine(0, i18n("Null Hypothesis: Population mean of %1 %2 Population mean of %3 ", col1Name, UTF8_QSTRING("="), col2Name), "blue"); + printLine(1, i18n("Alternate Hypothesis: Population mean of %1 %2 Population mean of %3 ", col1Name, UTF8_QSTRING("≠"), col2Name), "blue"); + break; + } + } break; } + } if (m_pValue[0] > 1) return 1; return m_pValue[0]; } // Virtual functions QWidget* HypothesisTest::view() const { if (!m_partView) { m_view = new HypothesisTestView(const_cast(this)); m_partView = m_view; } return m_partView; } diff --git a/src/backend/generalTest/HypothesisTest.h b/src/backend/generalTest/HypothesisTest.h index 6a1f9d59c..ec3e7b923 100644 --- a/src/backend/generalTest/HypothesisTest.h +++ b/src/backend/generalTest/HypothesisTest.h @@ -1,91 +1,87 @@ /*************************************************************************** File : HypothesisTest.h Project : LabPlot Description : Doing Hypothesis-Test on data provided -------------------------------------------------------------------- Copyright : (C) 2019 Devanshu Agarwal(agarwaldevanshu8@gmail.com) ***************************************************************************/ /*************************************************************************** * * * This program is free software; you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * * the Free Software Foundation; either version 2 of the License, or * * (at your option) any later version. * * * * This program is distributed in the hope that it will be useful, * * but WITHOUT ANY WARRANTY; without even the implied warranty of * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * * GNU General Public License for more details. * * * * You should have received a copy of the GNU General Public License * * along with this program; if not, write to the Free Software * * Foundation, Inc., 51 Franklin Street, Fifth Floor, * * Boston, MA 02110-1301 USA * * * ***************************************************************************/ #ifndef HYPOTHESISTEST_H #define HYPOTHESISTEST_H #include "GeneralTest.h" class HypothesisTest : public GeneralTest { Q_OBJECT public: explicit HypothesisTest(const QString& name); ~HypothesisTest() override; - struct Test { - enum Type { - NoneType = 0, - TTest = 1 << 0, - ZTest = 1 << 1, - Anova = 1 << 2 - }; - enum SubType { - NoneSubType = 0, - TwoSampleIndependent = 1 << 0, - TwoSamplePaired = 1 << 1, - OneSample = 1 << 2, - OneWay = 1 << 3, - TwoWay = 1 << 4 - }; - enum Tail {Positive, Negative, Two}; - Type type = NoneType; - SubType subtype = NoneSubType; - Tail tail; + enum Test { + // Type + TTest = 0x01, + ZTest = 0x02, + Anova = 0x03, + // SubType + TwoSampleIndependent = 0x10, + TwoSamplePaired = 0x20, + OneSample = 0x30, + OneWay = 0x40, + TwoWay = 0x50 }; + enum Tail {Positive, Negative, Two}; void setPopulationMean(QVariant populationMean); void setSignificanceLevel(QVariant alpha); + void setTail(Tail tail); - void performTest(Test m_test, bool categoricalVariable = true, bool equalVariance = true); + void performTest(int test, bool categoricalVariable = true, bool equalVariance = true); void performLeveneTest(bool categoricalVariable); QList& statisticValue(); QList& pValue(); QWidget* view() const override; + double myTest; + private: - void performTwoSampleIndependentTest(HypothesisTest::Test::Type test, bool categoricalVariable = false, bool equalVariance = true); - void performTwoSamplePairedTest(HypothesisTest::Test::Type test); - void performOneSampleTest(HypothesisTest::Test::Type test); + void performTwoSampleIndependentTest(int test, bool categoricalVariable = false, bool equalVariance = true); + void performTwoSamplePairedTest(int test); + void performOneSampleTest(int test); void performOneWayAnova(); void performTwoWayAnova(); void m_performLeveneTest(bool categoricalVariable); - double getPValue(const HypothesisTest::Test::Type& test, double& value, + double getPValue(const int &test, double& value, const QString& col1Name, const QString& col2name, const double mean, const double sp, const int df); double m_populationMean; double m_significanceLevel; - HypothesisTest::Test::Tail m_tailType; + Tail m_tail; QList m_pValue; QList m_statisticValue; }; #endif // HypothesisTest_H diff --git a/src/kdefrontend/dockwidgets/HypothesisTestDock.cpp b/src/kdefrontend/dockwidgets/HypothesisTestDock.cpp index a19d78df7..fade0a176 100644 --- a/src/kdefrontend/dockwidgets/HypothesisTestDock.cpp +++ b/src/kdefrontend/dockwidgets/HypothesisTestDock.cpp @@ -1,876 +1,858 @@ /*************************************************************************** - File : HypothesisTestDock.cpp - Project : LabPlot - Description : widget for hypothesis test properties - -------------------------------------------------------------------- - Copyright : (C) 2019 Devanshu Agarwal(agarwaldevanshu8@gmail.com) + File : HypothesisTestDock.cpp + Project : LabPlot + Description : widget for hypothesis test properties + -------------------------------------------------------------------- + Copyright : (C) 2019 Devanshu Agarwal(agarwaldevanshu8@gmail.com) ***************************************************************************/ /*************************************************************************** * * * This program is free software; you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * * the Free Software Foundation; either version 2 of the License, or * * (at your option) any later version. * * * * This program is distributed in the hope that it will be useful, * * but WITHOUT ANY WARRANTY; without even the implied warranty of * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * * GNU General Public License for more details. * * * * You should have received a copy of the GNU General Public License * * along with this program; if not, write to the Free Software * * Foundation, Inc., 51 Franklin Street, Fifth Floor, * * Boston, MA 02110-1301 USA * * * ***************************************************************************/ #include "HypothesisTestDock.h" #include "backend/core/AspectTreeModel.h" #include "backend/core/AbstractAspect.h" #include "backend/core/Project.h" #include "backend/spreadsheet/Spreadsheet.h" #include "commonfrontend/widgets/TreeViewComboBox.h" #include "kdefrontend/datasources/DatabaseManagerDialog.h" #include "kdefrontend/datasources/DatabaseManagerWidget.h" #include "kdefrontend/TemplateHandler.h" #include #include #include #include #include #include #include #include /*! \class HypothesisTestDock \brief Provides a dock (widget) for hypothesis testing: \ingroup kdefrontend */ //TODO: To add tooltips in docks for non obvious widgets. //TODO: Add functionality for database along with spreadsheet. HypothesisTestDock::HypothesisTestDock(QWidget* parent) : QWidget(parent) { //QDEBUG("in hypothesis test constructor "); ui.setupUi(this); ui.cbDataSourceType->addItem(i18n("Spreadsheet")); ui.cbDataSourceType->addItem(i18n("Database")); cbSpreadsheet = new TreeViewComboBox; ui.gridLayout->addWidget(cbSpreadsheet, 5, 4, 1, 3); ui.bDatabaseManager->setIcon(QIcon::fromTheme("network-server-database")); ui.bDatabaseManager->setToolTip(i18n("Manage connections")); m_configPath = QStandardPaths::standardLocations(QStandardPaths::AppDataLocation).constFirst() + "sql_connections"; // adding item to tests and testtype combo box; - ui.cbTest->addItem( i18n("T Test"), HypothesisTest::Test::Type::TTest); - ui.cbTest->addItem( i18n("Z Test"), HypothesisTest::Test::Type::ZTest); - ui.cbTest->addItem( i18n("ANOVA"), HypothesisTest::Test::Type::Anova); + ui.cbTest->addItem( i18n("T Test"), HypothesisTest::TTest); + ui.cbTest->addItem( i18n("Z Test"), HypothesisTest::ZTest); + ui.cbTest->addItem( i18n("ANOVA"), HypothesisTest::Anova); ui.lPopulationSigma->setText( UTF8_QSTRING("σ")); // making all test blocks invisible at starting. ui.pbLeveneTest->hide(); ui.lCategorical->hide(); ui.chbCategorical->hide(); ui.lCol1->hide(); ui.cbCol1->hide(); ui.lCol2->hide(); ui.cbCol2->hide(); ui.lCol3->hide(); ui.cbCol3->hide(); ui.lEqualVariance->hide(); ui.chbEqualVariance->hide(); ui.chbEqualVariance->setChecked(true); ui.lPopulationSigma->hide(); - ui.lPopulationSigma->setToolTip( i18n("Sigma of Population

" - "Hint: Z-Test if preffered over T-Test if this is known")); + ui.lPopulationSigma->setToolTip( i18n("Sigma of Population") +"

" + + i18n("Hint: Z-Test if preffered over T-Test if this is known")); ui.chbPopulationSigma->hide(); ui.lePopulationSigma->hide(); ui.pbPerformTest->setEnabled(false); ui.rbH1OneTail2->hide(); ui.rbH1OneTail1->hide(); ui.rbH1TwoTail->hide(); ui.rbH0OneTail1->hide(); ui.rbH0OneTail2->hide(); ui.rbH0TwoTail->hide(); ui.lH0->hide(); ui.lH1->hide(); QString mu = UTF8_QSTRING("μ"); QString mu0 = UTF8_QSTRING("μₒ"); // radio button for null and alternate hypothesis // for alternative hypothesis (h1) // one_tail_1 is mu > mu0; one_tail_2 is mu < mu0; two_tail = mu != mu0; ui.rbH1OneTail1->setText( i18n("%1 %2 %3", mu, UTF8_QSTRING(">"), mu0)); ui.rbH1OneTail2->setText( i18n("%1 %2 %3", mu, UTF8_QSTRING("<"), mu0)); ui.rbH1TwoTail->setText( i18n("%1 %2 %3", mu, UTF8_QSTRING("≠"), mu0)); ui.rbH0OneTail1->setText( i18n("%1 %2 %3",mu, UTF8_QSTRING("≤"), mu0)); ui.rbH0OneTail2->setText( i18n("%1 %2 %3", mu, UTF8_QSTRING("≥"), mu0)); ui.rbH0TwoTail->setText( i18n("%1 %2 %3", mu, UTF8_QSTRING("="), mu0)); ui.rbH0TwoTail->setEnabled(false); ui.rbH0OneTail1->setEnabled(false); ui.rbH0OneTail2->setEnabled(false); // setting muo and alpha buttons ui.lMuo->setText( i18n("%1", mu0)); ui.lAlpha->setText( i18n("%1", UTF8_QSTRING("α"))); ui.leMuo->setText( i18n("%1", m_populationMean)); ui.leAlpha->setText( i18n("%1", m_significanceLevel)); ui.lMuo->hide(); ui.lMuo->setToolTip( i18n("Population Mean")); ui.lAlpha->hide(); ui.lAlpha->setToolTip( i18n("Significance Level")); ui.leMuo->hide(); ui.leAlpha->hide(); ui.pbPerformTest->setIcon(QIcon::fromTheme("run-build")); ui.leMuo->setText( i18n("%1", m_populationMean)); ui.leAlpha->setText( i18n("%1", m_significanceLevel)); // readConnections(); // auto* style = ui.bAddRow->style(); // ui.bAddRow->setIcon(style->standardIcon(QStyle::SP_ArrowRight)); // ui.bAddRow->setToolTip(i18n("Add the selected field to rows")); // ui.bRemoveRow->setIcon(style->standardIcon(QStyle::SP_ArrowLeft)); // ui.bRemoveRow->setToolTip(i18n("Remove the selected field from rows")); // ui.bAddColumn->setIcon(style->standardIcon(QStyle::SP_ArrowRight)); // ui.bAddColumn->setToolTip(i18n("Add the selected field to columns")); // ui.bRemoveColumn->setIcon(style->standardIcon(QStyle::SP_ArrowLeft)); // ui.bRemoveColumn->setToolTip(i18n("Remove the selected field from columns")); // //add/remove buttons only enabled if something was selected // ui.bAddRow->setEnabled(false); // ui.bRemoveRow->setEnabled(false); // ui.bAddColumn->setEnabled(false); // ui.bRemoveColumn->setEnabled(false); // connect(ui.leName, &QLineEdit::textChanged, this, &HypothesisTestDock::nameChanged); // connect(ui.leComment, &QLineEdit::textChanged, this, &HypothesisTestDock::commentChanged); connect(ui.cbDataSourceType, static_cast(&QComboBox::currentIndexChanged), - this, &HypothesisTestDock::dataSourceTypeChanged); + this, &HypothesisTestDock::dataSourceTypeChanged); connect(cbSpreadsheet, &TreeViewComboBox::currentModelIndexChanged, this, &HypothesisTestDock::spreadsheetChanged); // connect(ui.cbConnection, static_cast(&QComboBox::currentIndexChanged), // this, &HypothesisTestDock::connectionChanged); // connect(ui.cbTable, static_cast(&QComboBox::currentIndexChanged), // this, &HypothesisTestDock::tableChanged); // connect(ui.bDatabaseManager, &QPushButton::clicked, this, &HypothesisTestDock::showDatabaseManager); // connect(ui.bAddRow, &QPushButton::clicked, this, &HypothesisTestDock::addRow); // connect(ui.bRemoveRow, &QPushButton::clicked, this,&HypothesisTestDock::removeRow); // connect(ui.bAddColumn, &QPushButton::clicked, this, &HypothesisTestDock::addColumn); // connect(ui.bRemoveColumn, &QPushButton::clicked, this,&HypothesisTestDock::removeColumn); // connect(ui.cbCol1, static_cast(&QComboBox::currentIndexChanged), this, &HypothesisTestDock::doTTest); // connect(ui.cbCol2, static_cast(&QComboBox::currentIndexChanged), this, &HypothesisTestDock::doTTest); // connect(ui.lwFields, &QListWidget::itemSelectionChanged, this, [=]() { // bool enabled = !ui.lwFields->selectedItems().isEmpty(); // ui.bAddRow->setEnabled(enabled); // ui.bAddColumn->setEnabled(enabled); // }); // connect(ui.lwRows, &QListWidget::doubleClicked, this,&HypothesisTestDock::removeRow); // connect(ui.lwRows, &QListWidget::itemSelectionChanged, this, [=]() { // ui.bRemoveRow->setEnabled(!ui.lwRows->selectedItems().isEmpty()); // }); // connect(ui.lwColumns, &QListWidget::doubleClicked, this,&HypothesisTestDock::removeColumn); // connect(ui.lwColumns, &QListWidget::itemSelectionChanged, this, [=]() { // ui.bRemoveColumn->setEnabled(!ui.lwColumns->selectedItems().isEmpty()); // }); connect(ui.cbTest, static_cast(&QComboBox::currentIndexChanged), this, &HypothesisTestDock::showTestType); connect(ui.cbTestType, static_cast(&QComboBox::currentIndexChanged), this, &HypothesisTestDock::showHypothesisTest); // connect(ui.cbTest, static_cast(&QComboBox::currentIndexChanged), this, &HypothesisTestDock::showHypothesisTest); // connect(ui.cbTestType, static_cast(&QComboBox::currentIndexChanged), this, &HypothesisTestDock::showHypothesisTest); connect(ui.pbPerformTest, &QPushButton::clicked, this, &HypothesisTestDock::doHypothesisTest); connect(ui.pbLeveneTest, &QPushButton::clicked, this, &HypothesisTestDock::performLeveneTest); //connecting null hypothesis and alternate hypothesis radio button connect(ui.rbH1OneTail1, &QRadioButton::toggled, this, &HypothesisTestDock::onRbH1OneTail1Toggled); connect(ui.rbH1OneTail2, &QRadioButton::toggled, this, &HypothesisTestDock::onRbH1OneTail2Toggled); connect(ui.rbH1TwoTail, &QRadioButton::toggled, this, &HypothesisTestDock::onRbH1TwoTailToggled); connect(ui.cbCol1, static_cast(&QComboBox::currentIndexChanged), this, &HypothesisTestDock::col1IndexChanged); connect(ui.chbCategorical, &QCheckBox::stateChanged, this, &HypothesisTestDock::changeCbCol2Label); connect(ui.chbPopulationSigma, &QCheckBox::stateChanged, this, &HypothesisTestDock::chbPopulationSigmaStateChanged); ui.cbTest->setCurrentIndex(0); emit ui.cbTest->currentIndexChanged(0); ui.cbTestType->setCurrentIndex(0); emit ui.cbTestType->currentIndexChanged(0); } void HypothesisTestDock::setHypothesisTest(HypothesisTest* HypothesisTest) { //QDEBUG("in set hypothesis test"); m_initializing = true; m_hypothesisTest = HypothesisTest; m_aspectTreeModel = new AspectTreeModel(m_hypothesisTest->project()); QList list{AspectType::Folder, AspectType::Workbook, - AspectType::Spreadsheet, AspectType::LiveDataSource}; + AspectType::Spreadsheet, AspectType::LiveDataSource}; cbSpreadsheet->setTopLevelClasses(list); list = {AspectType::Spreadsheet, AspectType::LiveDataSource}; m_aspectTreeModel->setSelectableAspects(list); cbSpreadsheet->setModel(m_aspectTreeModel); //show the properties ui.leName->setText(m_hypothesisTest->name()); ui.leComment->setText(m_hypothesisTest->comment()); ui.cbDataSourceType->setCurrentIndex(m_hypothesisTest->dataSourceType()); if (m_hypothesisTest->dataSourceType() == HypothesisTest::DataSourceType::DataSourceSpreadsheet) setModelIndexFromAspect(cbSpreadsheet, m_hypothesisTest->dataSourceSpreadsheet()); // else // ui.cbConnection->setCurrentIndex(ui.cbConnection->findText(m_hypothesisTest->dataSourceConnection())); setColumnsComboBoxModel(m_hypothesisTest->dataSourceSpreadsheet()); this->dataSourceTypeChanged(ui.cbDataSourceType->currentIndex()); //setting rows and columns in combo box; //undo functions // connect(m_hypothesisTest, SIGNAL(aspectDescriptionChanged(const AbstractAspect*)), this, SLOT(hypothesisTestDescriptionChanged(const AbstractAspect*))); m_initializing = false; } void HypothesisTestDock::showTestType() { - //QDEBUG("in show test type"); - m_test.type = HypothesisTest::Test::Type(ui.cbTest->currentData().toInt()); + m_test = ui.cbTest->currentData().toInt(); ui.cbTestType->clear(); - if (m_test.type & (HypothesisTest::Test::Type::TTest | HypothesisTest::Test::Type::ZTest)) { - ui.cbTestType->addItem( i18n("Two Sample Independent"), HypothesisTest::Test::SubType::TwoSampleIndependent); - ui.cbTestType->addItem( i18n("Two Sample Paired"), HypothesisTest::Test::SubType::TwoSamplePaired); - ui.cbTestType->addItem( i18n("One Sample"), HypothesisTest::Test::SubType::OneSample); - } else if (m_test.type & HypothesisTest::Test::Type::Anova) { - ui.cbTestType->addItem( i18n("One Way"), HypothesisTest::Test::SubType::OneWay); - ui.cbTestType->addItem( i18n("Two Way"), HypothesisTest::Test::SubType::TwoWay); + + if (testType(m_test) == HypothesisTest::TTest || + testType(m_test) == HypothesisTest::ZTest) { + ui.cbTestType->addItem( i18n("Two Sample Independent"), HypothesisTest::TwoSampleIndependent); + ui.cbTestType->addItem( i18n("Two Sample Paired"), HypothesisTest::TwoSamplePaired); + ui.cbTestType->addItem( i18n("One Sample"), HypothesisTest::OneSample); + } else if (testType(m_test) == HypothesisTest::Anova) { + ui.cbTestType->addItem( i18n("One Way"), HypothesisTest::OneWay); + ui.cbTestType->addItem( i18n("Two Way"), HypothesisTest::TwoWay); } } void HypothesisTestDock::showHypothesisTest() { //QDEBUG("in showHypothesisTest"); if (ui.cbTestType->count() == 0) return; - m_test.subtype = HypothesisTest::Test::SubType(ui.cbTestType->currentData().toInt()); + m_test |= ui.cbTestType->currentData().toInt(); ui.lCol1->show(); ui.cbCol1->show(); - ui.lCol2->setVisible(bool(m_test.subtype & (~HypothesisTest::Test::SubType::OneSample))); - ui.cbCol2->setVisible(bool(m_test.subtype & (~HypothesisTest::Test::SubType::OneSample))); - - ui.lCol3->setVisible(bool(m_test.type & (HypothesisTest::Test::Anova) & - setAllBits(m_test.subtype & HypothesisTest::Test::SubType::TwoWay))); - ui.cbCol3->setVisible(bool(m_test.type & (HypothesisTest::Test::Anova) & - setAllBits(m_test.subtype & HypothesisTest::Test::SubType::TwoWay))); + ui.lCol2->setVisible(testSubtype(m_test) != HypothesisTest::OneSample); + ui.cbCol2->setVisible(testSubtype(m_test) != HypothesisTest::OneSample); - ui.lEqualVariance->setVisible(bool( (m_test.type & HypothesisTest::Test::Type::TTest) & - (m_test.subtype & HypothesisTest::Test::SubType::TwoSampleIndependent))); - ui.chbEqualVariance->setVisible(bool( (m_test.type & HypothesisTest::Test::Type::TTest) & - (m_test.subtype & HypothesisTest::Test::SubType::TwoSampleIndependent))); + ui.lCol3->setVisible(m_test == (HypothesisTest::Anova | HypothesisTest::TwoWay)); + ui.cbCol3->setVisible(m_test == (HypothesisTest::Anova | HypothesisTest::TwoWay)); - ui.lCategorical->setVisible(bool((m_test.type & HypothesisTest::Test::Type::TTest) & - (m_test.subtype & HypothesisTest::Test::SubType::TwoSampleIndependent))); - ui.chbCategorical->setVisible(bool((m_test.type & HypothesisTest::Test::Type::TTest) & - (m_test.subtype & HypothesisTest::Test::SubType::TwoSampleIndependent))); + ui.lEqualVariance->setVisible(m_test == (HypothesisTest::TTest | HypothesisTest::TwoSampleIndependent)); + ui.chbEqualVariance->setVisible(m_test == (HypothesisTest::TTest | HypothesisTest::TwoSampleIndependent)); ui.chbEqualVariance->setChecked(true); - ui.lPopulationSigma->setVisible(bool((m_test.type & (HypothesisTest::Test::Type::TTest | - HypothesisTest::Test::Type::ZTest)) & - ~(setAllBits(m_test.subtype & HypothesisTest::Test::SubType::OneSample)))); + ui.lCategorical->setVisible(m_test == (HypothesisTest::TTest | HypothesisTest::TwoSampleIndependent)); + ui.chbCategorical->setVisible(m_test == (HypothesisTest::TTest | HypothesisTest::TwoSampleIndependent)); + - ui.chbPopulationSigma->setVisible(bool((m_test.type & (HypothesisTest::Test::Type::TTest | - HypothesisTest::Test::Type::ZTest)) & - ~(setAllBits(m_test.subtype & HypothesisTest::Test::SubType::OneSample)))); + ui.lPopulationSigma->setVisible(m_test == (HypothesisTest::TTest | HypothesisTest::OneSample) || + m_test == (HypothesisTest::ZTest | HypothesisTest::OneSample)); + ui.chbPopulationSigma->setVisible(m_test == (HypothesisTest::TTest | HypothesisTest::OneSample) || + m_test == (HypothesisTest::ZTest | HypothesisTest::OneSample)); ui.chbPopulationSigma->setChecked(false); - ui.pbLeveneTest->setVisible(bool((m_test.type & HypothesisTest::Test::Type::Anova & - setAllBits(m_test.subtype & HypothesisTest::Test::SubType::OneWay)) | - (HypothesisTest::Test::Type::TTest & - setAllBits(m_test.subtype & HypothesisTest::Test::SubType::TwoSampleIndependent)))); + ui.pbLeveneTest->setVisible(m_test == (HypothesisTest::Anova | HypothesisTest::OneWay) || + m_test == (HypothesisTest::TTest | HypothesisTest::TwoSampleIndependent)); - ui.lH1->setVisible(bool(m_test.type & ~HypothesisTest::Test::Type::Anova)); - ui.rbH1OneTail1->setVisible(bool(m_test.type & ~HypothesisTest::Test::Type::Anova)); - ui.rbH1OneTail2->setVisible(bool(m_test.type & ~HypothesisTest::Test::Type::Anova)); - ui.rbH1TwoTail->setVisible(bool(m_test.type & ~HypothesisTest::Test::Type::Anova)); + ui.lH1->setVisible(testType(m_test) != HypothesisTest::Anova); + ui.rbH1OneTail1->setVisible(testType(m_test) != HypothesisTest::Anova); + ui.rbH1OneTail2->setVisible(testType(m_test) != HypothesisTest::Anova); + ui.rbH1TwoTail->setVisible(testType(m_test) != HypothesisTest::Anova); - ui.lH0->setVisible(bool(m_test.type & ~HypothesisTest::Test::Type::Anova)); - ui.rbH0OneTail1->setVisible(bool(m_test.type & ~HypothesisTest::Test::Type::Anova)); - ui.rbH0OneTail2->setVisible(bool(m_test.type & ~HypothesisTest::Test::Type::Anova)); - ui.rbH0TwoTail->setVisible(bool(m_test.type & ~HypothesisTest::Test::Type::Anova)); + ui.lH0->setVisible(testType(m_test) != HypothesisTest::Anova); + ui.rbH0OneTail1->setVisible(testType(m_test) != HypothesisTest::Anova); + ui.rbH0OneTail2->setVisible(testType(m_test) != HypothesisTest::Anova); + ui.rbH0TwoTail->setVisible(testType(m_test) != HypothesisTest::Anova); ui.rbH1TwoTail->setChecked(true); - ui.lMuo->setVisible(bool(m_test.subtype & HypothesisTest::Test::SubType::OneSample)); - ui.leMuo->setVisible(bool(ui.lMuo->isVisible())); + ui.lMuo->setVisible(testSubtype(m_test) == HypothesisTest::OneSample); + ui.leMuo->setVisible(testSubtype(m_test) == HypothesisTest::OneSample); ui.lAlpha->show(); ui.leAlpha->show(); setColumnsComboBoxView(); ui.pbPerformTest->setEnabled(nonEmptySelectedColumns()); ui.pbLeveneTest->setEnabled(nonEmptySelectedColumns()); } void HypothesisTestDock::doHypothesisTest() { //QDEBUG("in doHypothesisTest"); + m_hypothesisTest->setTail(m_tail); m_hypothesisTest->setPopulationMean(ui.leMuo->text()); m_hypothesisTest->setSignificanceLevel(ui.leAlpha->text()); QVector cols; if (ui.cbCol1->count() == 0) return; cols << reinterpret_cast(ui.cbCol1->currentData().toLongLong()); - if (m_test.subtype & HypothesisTest::Test::SubType::TwoWay) + if (testSubtype(m_test) == HypothesisTest::TwoWay) cols << reinterpret_cast(ui.cbCol3->currentData().toLongLong()); - if (m_test.subtype & (~HypothesisTest::Test::SubType::OneSample)) + if (testSubtype(m_test) != HypothesisTest::OneSample) if (ui.cbCol2->count() > 0) cols << reinterpret_cast(ui.cbCol2->currentData().toLongLong()); m_hypothesisTest->setColumns(cols); m_hypothesisTest->performTest(m_test, ui.chbCategorical->isChecked(), ui.chbEqualVariance->isChecked()); } void HypothesisTestDock::performLeveneTest() { QVector cols; if (ui.cbCol1->count() == 0 || ui.cbCol2->count() == 0) return; cols << reinterpret_cast(ui.cbCol1->currentData().toLongLong()); cols << reinterpret_cast(ui.cbCol2->currentData().toLongLong()); m_hypothesisTest->setColumns(cols); m_hypothesisTest->setSignificanceLevel(ui.leAlpha->text()); m_hypothesisTest->performLeveneTest(ui.chbCategorical->isChecked()); } void HypothesisTestDock::setModelIndexFromAspect(TreeViewComboBox* cb, const AbstractAspect* aspect) { if (aspect) cb->setCurrentModelIndex(m_aspectTreeModel->modelIndexOfAspect(aspect)); else cb->setCurrentModelIndex(QModelIndex()); } ///*! // shows the database manager where the connections are created and edited. // The selected connection is selected in the connection combo box in this widget. //**/ //void HypothesisTestDock::showDatabaseManager() { // DatabaseManagerDialog* dlg = new DatabaseManagerDialog(this, ui.cbConnection->currentText()); // if (dlg->exec() == QDialog::Accepted) { // //re-read the available connections to be in sync with the changes in DatabaseManager // m_initializing = true; // ui.cbConnection->clear(); // readConnections(); // //select the connection the user has selected in DatabaseManager // const QString& conn = dlg->connection(); // ui.cbConnection->setCurrentIndex(ui.cbConnection->findText(conn)); // m_initializing = false; // connectionChanged(); // } // delete dlg; //} ///*! // loads all available saved connections //*/ //void HypothesisTestDock::readConnections() { // DEBUG("ImportSQLDatabaseWidget: reading available connections"); // KConfig config(m_configPath, KConfig::SimpleConfig); // for (const auto& name : config.groupList()) // ui.cbConnection->addItem(name); //} ///*! // * adds the selected field to the rows // */ //void HypothesisTestDock::addRow() { // QString field = ui.lwFields->currentItem()->text(); // ui.lwRows->addItem(field); // ui.lwFields->takeItem(ui.lwFields->currentRow()); // m_hypothesisTest->addToRows(field); //} ///*! // * removes the selected field from the rows // */ //void HypothesisTestDock::removeRow() { // const QString& field = ui.lwRows->currentItem()->text(); // ui.lwRows->takeItem(ui.lwRows->currentRow()); // m_hypothesisTest->removeFromRows(field); // updateFields(); //} ///*! // * adds the selected field to the columns // */ //void HypothesisTestDock::addColumn() { // QString field = ui.lwFields->currentItem()->text(); // ui.lwColumns->addItem(field); // ui.lwFields->takeItem(ui.lwFields->currentRow()); // m_hypothesisTest->addToColumns(field); //} ///*! // * removes the selected field from the columns // */ //void HypothesisTestDock::removeColumn() { // const QString& field = ui.lwColumns->currentItem()->text(); // ui.lwColumns->takeItem(ui.lwColumns->currentRow()); // m_hypothesisTest->removeFromColumns(field); // updateFields(); //} ///*! // * re-populates the content of the "Fields" list widget by adding the non-selected fields only. // * called when a selected field is removed from rows or columns. // */ //void HypothesisTestDock::updateFields() { // ui.lwFields->clear(); // for (auto dimension : m_hypothesisTest->dimensions()) // if (!fieldSelected(dimension)) // ui.lwFields->addItem(new QListWidgetItem(QIcon::fromTheme("draw-text"), dimension)); // for (auto measure : m_hypothesisTest->measures()) // if (!fieldSelected(measure)) // ui.lwFields->addItem(new QListWidgetItem(measure)); //} ///*! // * return \c true if the field name \c field was selected among rows or columns, // * return \c false otherwise. // * */ //bool HypothesisTestDock::fieldSelected(const QString& field) { // for (int i = 0; icount(); ++i) // if (ui.lwRows->item(i)->text() == field) // return true; // for (int i = 0; icount(); ++i) // if (ui.lwColumns->item(i)->text() == field) // return true; // return false; //} ////************************************************************* ////****** SLOTs for changes triggered in HypothesisTestDock ******* ////************************************************************* //void HypothesisTestDock::nameChanged() { // if (m_initializing) // return; // m_hypothesisTest->setName(ui.leName->text()); //} //void HypothesisTestDock::commentChanged() { // if (m_initializing) // return; // m_hypothesisTest->setComment(ui.leComment->text()); //} void HypothesisTestDock::dataSourceTypeChanged(int index) { //QDEBUG("in dataSourceTypeChanged"); HypothesisTest::DataSourceType type = static_cast(index); bool showDatabase = (type == HypothesisTest::DataSourceType::DataSourceDatabase); ui.lSpreadsheet->setVisible(!showDatabase); cbSpreadsheet->setVisible(!showDatabase); ui.lConnection->setVisible(showDatabase); ui.cbConnection->setVisible(showDatabase); ui.bDatabaseManager->setVisible(showDatabase); ui.lTable->setVisible(showDatabase); ui.cbTable->setVisible(showDatabase); if (m_initializing) return; m_hypothesisTest->setComment(ui.leComment->text()); } void HypothesisTestDock::spreadsheetChanged(const QModelIndex& index) { //QDEBUG("in spreadsheetChanged"); auto* aspect = static_cast(index.internalPointer()); Spreadsheet* spreadsheet = dynamic_cast(aspect); setColumnsComboBoxModel(spreadsheet); m_hypothesisTest->setDataSourceSpreadsheet(spreadsheet); } void HypothesisTestDock::changeCbCol2Label() { //QDEBUG("in changeCbCol2Label"); - if ( (m_test.type & ~HypothesisTest::Test::Type::Anova) & (m_test.subtype & ~HypothesisTest::Test::SubType::TwoSampleIndependent)) { + if (testType(m_test) != HypothesisTest::Anova || + testSubtype(m_test) != HypothesisTest::TwoSampleIndependent) { ui.lCol2->setText( i18n("Independent Var. 2")); return; } if (ui.cbCol1->count() == 0) return; QString selected_text = ui.cbCol1->currentText(); Column* col1 = m_hypothesisTest->dataSourceSpreadsheet()->column(selected_text); if (!ui.chbCategorical->isChecked() && (col1->columnMode() == AbstractColumn::Integer || col1->columnMode() == AbstractColumn::Numeric)) { ui.lCol2->setText( i18n("Independent Var. 2")); ui.chbCategorical->setChecked(false); ui.chbCategorical->setEnabled(true); } else { ui.lCol2->setText( i18n("Dependent Var. 1")); if (!ui.chbCategorical->isChecked()) ui.chbCategorical->setEnabled(false); else ui.chbCategorical->setEnabled(true); ui.chbCategorical->setChecked(true); } } void HypothesisTestDock::chbPopulationSigmaStateChanged() { if (ui.chbPopulationSigma->isVisible() && ui.chbPopulationSigma->isChecked()) ui.lePopulationSigma->show(); else ui.lePopulationSigma->hide(); } void HypothesisTestDock::col1IndexChanged(int index) { if (index < 0) return; changeCbCol2Label(); } //void HypothesisTestDock::connectionChanged() { // if (ui.cbConnection->currentIndex() == -1) { // ui.lTable->hide(); // ui.cbTable->hide(); // return; // } // //clear the previously shown tables // ui.cbTable->clear(); // ui.lTable->show(); // ui.cbTable->show(); // const QString& connection = ui.cbConnection->currentText(); // //connection name was changed, determine the current connections settings // KConfig config(m_configPath, KConfig::SimpleConfig); // KConfigGroup group = config.group(connection); // //close and remove the previos connection, if available // if (m_db.isOpen()) { // m_db.close(); // QSqlDatabase::removeDatabase(m_db.driverName()); // } // //open the selected connection // //QDEBUG("HypothesisTestDock: connecting to " + connection); // const QString& driver = group.readEntry("Driver"); // m_db = QSqlDatabase::addDatabase(driver); // const QString& dbName = group.readEntry("DatabaseName"); // if (DatabaseManagerWidget::isFileDB(driver)) { // if (!QFile::exists(dbName)) { // KMessageBox::error(this, i18n("Couldn't find the database file '%1'. Please check the connection settings.", dbName), // appendRow i18n("Connection Failed")); // return; // } else // m_db.setDatabaseName(dbName); // } else if (DatabaseManagerWidget::isODBC(driver)) { // if (group.readEntry("CustomConnectionEnabled", false)) // m_db.setDatabaseName(group.readEntry("CustomConnectionString")); // else // m_db.setDatabaseName(dbName); // } else { // m_db.setDatabaseName(dbName); // m_db.setHostName( group.readEntry("HostName") ); // m_db.setPort( group.readEntry("Port", 0) ); // m_db.setUserName( group.readEntry("UserName") ); // m_db.setPassword( group.readEntry("Password") ); // } // WAIT_CURSOR; // if (!m_db.open()) { // RESET_CURSOR; // KMessageBox::error(this, i18n("Failed to connect to the database '%1'. Please check the connection settings.", ui.cbConnection->currentText()) + // QLatin1String("\n\n") + m_db.lastError().databaseText(), // i18n("Connection Failed")); // return; // } // //show all available database tables // if (m_db.tables().size()) { // for (auto table : m_db.tables()) // ui.cbTable->addItem(QIcon::fromTheme("view-form-table"), table); // ui.cbTable->setCurrentIndex(0); // } // RESET_CURSOR; // if (m_initializing) // return; //// m_hypothesisTest->setDataSourceConnection(connection); //} //void HypothesisTestDock::tableChanged() { // const QString& table = ui.cbTable->currentText(); // //show all attributes of the selected table //// for (const auto* col : spreadsheet->children()) { //// QListWidgetItem* item = new QListWidgetItem(col->icon(), col->name()); //// ui.lwFields->addItem(item); //// } // if (m_initializing) // return; //// m_hypothesisTest->setDataSourceTable(table); //} ////************************************************************* ////******** SLOTs for changes triggered in Spreadsheet ********* ////************************************************************* void HypothesisTestDock::hypothesisTestDescriptionChanged(const AbstractAspect* aspect) { //QDEBUG("in hypothesisTestDescriptionChanged"); if (m_hypothesisTest != aspect) return; m_initializing = true; if (aspect->name() != ui.leName->text()) ui.leName->setText(aspect->name()); else if (aspect->comment() != ui.leComment->text()) ui.leComment->setText(aspect->comment()); m_initializing = false; } ////************************************************************* ////******************** SETTINGS ******************************* ////************************************************************* //void HypothesisTestDock::load() { //} //void HypothesisTestDock::loadConfigFromTemplate(KConfig& config) { // Q_UNUSED(config); //} ///*! // loads saved matrix properties from \c config. // */ //void HypothesisTestDock::loadConfig(KConfig& config) { // Q_UNUSED(config); //} ///*! // saves matrix properties to \c config. // */ //void HypothesisTestDock::saveConfigAsTemplate(KConfig& config) { // Q_UNUSED(config); //} //TODO: Rather than inbuilt slots use own decided slots for checked rather than clicked // for alternate hypothesis // one_tail_1 is mu > mu0; one_tail_2 is mu < mu0; two_tail = mu != mu0; void HypothesisTestDock::onRbH1OneTail1Toggled(bool checked) { if (!checked) return; ui.rbH0OneTail1->setChecked(true); - m_test.tail = HypothesisTest::Test::Tail::Positive; + m_tail = HypothesisTest::Positive; } void HypothesisTestDock::onRbH1OneTail2Toggled(bool checked) { if (!checked) return; ui.rbH0OneTail2->setChecked(true); - m_test.tail = HypothesisTest::Test::Tail::Negative; - + m_tail = HypothesisTest::Negative; } void HypothesisTestDock::onRbH1TwoTailToggled(bool checked) { if (!checked) return; ui.rbH0TwoTail->setChecked(true); - m_test.tail = HypothesisTest::Test::Tail::Two; + m_tail = HypothesisTest::Two; } - /**************************************Helper Functions********************************************/ void HypothesisTestDock::countPartitions(Column *column, int &np, int &total_rows) { total_rows = column->rowCount(); np = 0; QString cell_value; QMap discovered_categorical_var; AbstractColumn::ColumnMode original_col_mode = column->columnMode(); column->setColumnMode(AbstractColumn::Text); for (int i = 0; i < total_rows; i++) { cell_value = column->textAt(i); if (cell_value.isEmpty()) { total_rows = i; break; } if (discovered_categorical_var[cell_value]) continue; discovered_categorical_var[cell_value] = true; np++; } column->setColumnMode(original_col_mode); } void HypothesisTestDock::setColumnsComboBoxModel(Spreadsheet* spreadsheet) { m_onlyValuesCols.clear(); m_twoCategoricalCols.clear(); m_multiCategoricalCols.clear(); for (auto* col : spreadsheet->children()) { if (col->columnMode() == AbstractColumn::Integer || col->columnMode() == AbstractColumn::Numeric) m_onlyValuesCols.append(col); else { int np = 0, n_rows = 0; countPartitions(col, np, n_rows); if (np <= 1) continue; else if (np == 2) m_twoCategoricalCols.append(col); else m_multiCategoricalCols.append(col); } } setColumnsComboBoxView(); showHypothesisTest(); } //TODO: change from if else to switch case: void HypothesisTestDock::setColumnsComboBoxView() { ui.cbCol1->clear(); ui.cbCol2->clear(); ui.cbCol3->clear(); QList::iterator i; - switch (m_test.type) { - case (HypothesisTest::Test::Type::ZTest): - case (HypothesisTest::Test::Type::TTest): { - switch (m_test.subtype) { - case (HypothesisTest::Test::SubType::TwoSampleIndependent): { - for (i = m_onlyValuesCols.begin(); i != m_onlyValuesCols.end(); i++) { - ui.cbCol1->addItem( (*i)->name(), qint64(*i)); - ui.cbCol2->addItem( (*i)->name(), qint64(*i)); - } - for (i = m_twoCategoricalCols.begin(); i != m_twoCategoricalCols.end(); i++) - ui.cbCol1->addItem( (*i)->name(), qint64(*i)); - break; - } - case (HypothesisTest::Test::SubType::TwoSamplePaired): { - for (i = m_onlyValuesCols.begin(); i != m_onlyValuesCols.end(); i++) { - ui.cbCol1->addItem( (*i)->name(), qint64(*i)); - ui.cbCol2->addItem( (*i)->name(), qint64(*i)); - } - break; - } - case (HypothesisTest::Test::SubType::OneSample): { - for (i = m_onlyValuesCols.begin(); i != m_onlyValuesCols.end(); i++) - ui.cbCol1->addItem( (*i)->name(), qint64(*i)); - break; - } - case HypothesisTest::Test::SubType::OneWay: - case HypothesisTest::Test::SubType::TwoWay: - case HypothesisTest::Test::SubType::NoneSubType: - break; + switch (testType(m_test)) { + case (HypothesisTest::ZTest): + case (HypothesisTest::TTest): { + switch (testSubtype(m_test)) { + case (HypothesisTest::TwoSampleIndependent): { + for (i = m_onlyValuesCols.begin(); i != m_onlyValuesCols.end(); i++) { + ui.cbCol1->addItem( (*i)->name(), qint64(*i)); + ui.cbCol2->addItem( (*i)->name(), qint64(*i)); } + for (i = m_twoCategoricalCols.begin(); i != m_twoCategoricalCols.end(); i++) + ui.cbCol1->addItem( (*i)->name(), qint64(*i)); break; } - case HypothesisTest::Test::Type::Anova: { - switch (m_test.subtype) { - case HypothesisTest::Test::SubType::OneWay: { - for (i = m_onlyValuesCols.begin(); i != m_onlyValuesCols.end(); i++) - ui.cbCol2->addItem( (*i)->name(), qint64(*i)); - for (i = m_twoCategoricalCols.begin(); i != m_twoCategoricalCols.end(); i++) - ui.cbCol1->addItem( (*i)->name(), qint64(*i)); - for (i = m_multiCategoricalCols.begin(); i != m_multiCategoricalCols.end(); i++) - ui.cbCol1->addItem( (*i)->name(), qint64(*i)); - break; - } - case HypothesisTest::Test::SubType::TwoWay: { - for (i = m_onlyValuesCols.begin(); i != m_onlyValuesCols.end(); i++) - ui.cbCol2->addItem( (*i)->name(), qint64(*i)); - - for (i = m_twoCategoricalCols.begin(); i != m_twoCategoricalCols.end(); i++) { - ui.cbCol1->addItem( (*i)->name(), qint64(*i)); - ui.cbCol3->addItem( (*i)->name(), qint64(*i)); - } - for (i = m_multiCategoricalCols.begin(); i != m_multiCategoricalCols.end(); i++) { - ui.cbCol1->addItem( (*i)->name(), qint64(*i)); - ui.cbCol3->addItem( (*i)->name(), qint64(*i)); - } - break; - } - case HypothesisTest::Test::SubType::TwoSampleIndependent: - case HypothesisTest::Test::SubType::TwoSamplePaired: - case HypothesisTest::Test::SubType::OneSample: - case HypothesisTest::Test::SubType::NoneSubType: - break; + case (HypothesisTest::TwoSamplePaired): { + for (i = m_onlyValuesCols.begin(); i != m_onlyValuesCols.end(); i++) { + ui.cbCol1->addItem( (*i)->name(), qint64(*i)); + ui.cbCol2->addItem( (*i)->name(), qint64(*i)); } break; } - case HypothesisTest::Test::Type::NoneType: + case (HypothesisTest::OneSample): { + for (i = m_onlyValuesCols.begin(); i != m_onlyValuesCols.end(); i++) + ui.cbCol1->addItem( (*i)->name(), qint64(*i)); + break; + } + } break; } + case HypothesisTest::Anova: { + switch (testSubtype(m_test)) { + case HypothesisTest::OneWay: { + for (i = m_onlyValuesCols.begin(); i != m_onlyValuesCols.end(); i++) + ui.cbCol2->addItem( (*i)->name(), qint64(*i)); + for (i = m_twoCategoricalCols.begin(); i != m_twoCategoricalCols.end(); i++) + ui.cbCol1->addItem( (*i)->name(), qint64(*i)); + for (i = m_multiCategoricalCols.begin(); i != m_multiCategoricalCols.end(); i++) + ui.cbCol1->addItem( (*i)->name(), qint64(*i)); + break; + } + case HypothesisTest::TwoWay: { + for (i = m_onlyValuesCols.begin(); i != m_onlyValuesCols.end(); i++) + ui.cbCol2->addItem( (*i)->name(), qint64(*i)); + + for (i = m_twoCategoricalCols.begin(); i != m_twoCategoricalCols.end(); i++) { + ui.cbCol1->addItem( (*i)->name(), qint64(*i)); + ui.cbCol3->addItem( (*i)->name(), qint64(*i)); + } + for (i = m_multiCategoricalCols.begin(); i != m_multiCategoricalCols.end(); i++) { + ui.cbCol1->addItem( (*i)->name(), qint64(*i)); + ui.cbCol3->addItem( (*i)->name(), qint64(*i)); + } + break; + } + } + break; + } + } } bool HypothesisTestDock::nonEmptySelectedColumns() { if ((ui.cbCol1->isVisible() && ui.cbCol1->count() < 1) || - (ui.cbCol2->isVisible() && ui.cbCol2->count() < 1) || - (ui.cbCol3->isVisible() && ui.cbCol3->count() < 1)) + (ui.cbCol2->isVisible() && ui.cbCol2->count() < 1) || + (ui.cbCol3->isVisible() && ui.cbCol3->count() < 1)) return false; return true; } -uint8_t HypothesisTestDock::setAllBits(const uint8_t& bits) { - if (!bits) - return 0; - return ~(bits & (bits-1)); +int HypothesisTestDock::testType(int test) { + return test & 0x0F; +} + +int HypothesisTestDock::testSubtype(int test) { + return test & 0xF0; } diff --git a/src/kdefrontend/dockwidgets/HypothesisTestDock.h b/src/kdefrontend/dockwidgets/HypothesisTestDock.h index d7a6f1d21..7f63f960a 100644 --- a/src/kdefrontend/dockwidgets/HypothesisTestDock.h +++ b/src/kdefrontend/dockwidgets/HypothesisTestDock.h @@ -1,119 +1,122 @@ /*************************************************************************** File : HypothesisTestDock.h Project : LabPlot Description : widget for hypothesis testing properties -------------------------------------------------------------------- Copyright : (C) 2019 Devanshu Agarwal(agarwaldevanshu8@gmail.com) ***************************************************************************/ /*************************************************************************** * * * This program is free software; you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * * the Free Software Foundation; either version 2 of the License, or * * (at your option) any later version. * * * * This program is distributed in the hope that it will be useful, * * but WITHOUT ANY WARRANTY; without even the implied warranty of * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * * GNU General Public License for more details. * * * * You should have received a copy of the GNU General Public License * * along with this program; if not, write to the Free Software * * Foundation, Inc., 51 Franklin Street, Fifth Floor, * * Boston, MA 02110-1301 USA * * * ***************************************************************************/ #ifndef HYPOTHESISTESTDOCK_H #define HYPOTHESISTESTDOCK_H #include "backend/generalTest/HypothesisTest.h" #include "ui_hypothesistestdock.h" #include class AbstractAspect; class AspectTreeModel; class HypothesisTest; class TreeViewComboBox; class KConfig; class QScrollArea; class QStandardItemModel; class QStandardItem; class QComboBox; class HypothesisTestDock : public QWidget { Q_OBJECT public: explicit HypothesisTestDock(QWidget*); void setHypothesisTest(HypothesisTest*); private: Ui::HypothesisTestDock ui; bool m_initializing{false}; TreeViewComboBox* cbSpreadsheet{nullptr}; HypothesisTest* m_hypothesisTest{nullptr}; AspectTreeModel* m_aspectTreeModel{nullptr}; QSqlDatabase m_db; QString m_configPath; double m_populationMean{0}; double m_significanceLevel{0.05}; // void load(); // void loadConfig(KConfig&); void setModelIndexFromAspect(TreeViewComboBox*, const AbstractAspect*); // void readConnections(); // void updateFields(); // bool fieldSelected(const QString&); - HypothesisTest::Test m_test; + int m_test; + HypothesisTest::Tail m_tail; QScrollArea* scrollDock; void countPartitions(Column *column, int &np, int &total_rows); void setColumnsComboBoxModel(Spreadsheet* spreadsheet); void setColumnsComboBoxView(); bool nonEmptySelectedColumns(); - std::uint8_t setAllBits(const std::uint8_t& bits); + + int testType(int test); + int testSubtype(int test); QList m_onlyValuesCols; QList m_twoCategoricalCols; QList m_multiCategoricalCols; private slots: //SLOTs for changes triggered in PivotTableDock // void nameChanged(); // void commentChanged(); void dataSourceTypeChanged(int); void showTestType(); void showHypothesisTest(); void doHypothesisTest(); void performLeveneTest(); void spreadsheetChanged(const QModelIndex&); void changeCbCol2Label(); void chbPopulationSigmaStateChanged(); void col1IndexChanged(int index); void onRbH1OneTail1Toggled(bool checked); void onRbH1OneTail2Toggled(bool checked); void onRbH1TwoTailToggled(bool checked); // void connectionChanged(); // void tableChanged(); // void showDatabaseManager(); // //SLOTs for changes triggered in PivotTable void hypothesisTestDescriptionChanged(const AbstractAspect*); // void addRow(); // void removeRow(); // void addColumn(); // void removeColumn(); // //save/load template // void loadConfigFromTemplate(KConfig&); // void saveConfigAsTemplate(KConfig&); signals: // void info(const QString&); }; #endif // PIVOTTABLEDOCK_H diff --git a/tests/stats/anova/AnovaTest.cpp b/tests/stats/anova/AnovaTest.cpp index 70444d23c..bb0d6e215 100644 --- a/tests/stats/anova/AnovaTest.cpp +++ b/tests/stats/anova/AnovaTest.cpp @@ -1,185 +1,185 @@ /*************************************************************************** - File : AnovaTest.cpp + File : AnovaTest.cpp Project : LabPlot Description : Tests for data correlation -------------------------------------------------------------------- Copyright : (C) 2019 Devanshu Agarwal (agarwaldevanshu8@gmail.com) ***************************************************************************/ /*************************************************************************** * * * This program is free software; you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * * the Free Software Foundation; either version 2 of the License, or * * (at your option) any later version. * * * * This program is distributed in the hope that it will be useful, * * but WITHOUT ANY WARRANTY; without even the implied warranty of * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * * GNU General Public License for more details. * * * * You should have received a copy of the GNU General Public License * * along with this program; if not, write to the Free Software * * Foundation, Inc., 51 Franklin Street, Fifth Floor, * * Boston, MA 02110-1301 USA * * * ***************************************************************************/ #include "AnovaTest.h" #include "backend/generalTest/HypothesisTest.h" #include "backend/core/AbstractColumn.h" #include "backend/core/column/Column.h" void AnovaTest::oneWayAnova_data() { QTest::addColumn>("col1Data"); QTest::addColumn>("col2Data"); QTest::addColumn("fValue_expected"); QTest::addColumn("pValue_expected"); // First Sample QVector col1Data = {"1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", - "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", - "3", "3", "3", "3", "3", "3", "3", "3", "3", "3", "3", "3", "3", "3", "3", "3", "3", "3", "3", "3", - "4", "4", "4", "4", "4", "4", "4", "4", "4", "4", "4", "4", "4", "4", "4", "4", "4", "4", "4", "4", - "5", "5", "5", "5", "5", "5", "5", "5", "5", "5", "5", "5", "5", "5", "5", "5", "5", "5", "5", "5" - }; + "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", + "3", "3", "3", "3", "3", "3", "3", "3", "3", "3", "3", "3", "3", "3", "3", "3", "3", "3", "3", "3", + "4", "4", "4", "4", "4", "4", "4", "4", "4", "4", "4", "4", "4", "4", "4", "4", "4", "4", "4", "4", + "5", "5", "5", "5", "5", "5", "5", "5", "5", "5", "5", "5", "5", "5", "5", "5", "5", "5", "5", "5" + }; QVector col2Data = {1, 43, 15, 40, 8, 17, 30, 34, 34, 26, 1, 7, 22, 30, 40, 15, 20, 9, 14, 15, - 6, 30, 15, 30, 12, 17, 21, 23, 20, 27, -19, -18, -8, -6, -6, -9, -17, -12, -11, -6, - 5, 8, 12, 19, 8, 15, 21, 28, 26, 27, -10, 6, 4, 3, 0, 4, 9, -5, 7, 13, - 38, 20, 20, 28, 11, 17, 15, 27, 24, 23, 28, 26, 34, 32, 24, 29, 30, 24, 34, 23, - -5, -12, -15, -4, -2, -6, -2, -7, -10, -15, -13, -16, -23, -22, -9, -18, -17, -15, -14, -15 - }; + 6, 30, 15, 30, 12, 17, 21, 23, 20, 27, -19, -18, -8, -6, -6, -9, -17, -12, -11, -6, + 5, 8, 12, 19, 8, 15, 21, 28, 26, 27, -10, 6, 4, 3, 0, 4, 9, -5, 7, 13, + 38, 20, 20, 28, 11, 17, 15, 27, 24, 23, 28, 26, 34, 32, 24, 29, 30, 24, 34, 23, + -5, -12, -15, -4, -2, -6, -2, -7, -10, -15, -13, -16, -23, -22, -9, -18, -17, -15, -14, -15 + }; double fValue_expected = 33.1288915411; double pValue_expected = 0; QTest::newRow("First Sample") << col1Data << col2Data << fValue_expected << pValue_expected; } void AnovaTest::oneWayAnova() { QFETCH(QVector, col1Data); QFETCH(QVector, col2Data); QFETCH(double, fValue_expected); QFETCH(double, pValue_expected); Column* col1 = new Column("col1", AbstractColumn::Text); Column* col2 = new Column("col2", AbstractColumn::Numeric); col1->replaceTexts(0, col1Data); col2->replaceValues(0, col2Data); QVector cols; cols << col1 << col2; HypothesisTest anovaTest("One Way Anova"); anovaTest.setColumns(cols); - HypothesisTest::Test test; - test.type = HypothesisTest::Test::Type::Anova; - test.subtype = HypothesisTest::Test::SubType::OneWay; - test.tail = HypothesisTest::Test::Tail::Two; + int test; + test = HypothesisTest::Anova; + test |= HypothesisTest::OneWay; + anovaTest.setTail(HypothesisTest::Two); bool categoricalVariable = true; bool equalVariance = true; anovaTest.performTest(test, categoricalVariable, equalVariance); double fValue = anovaTest.statisticValue()[0]; double pValue = anovaTest.pValue()[0]; QDEBUG("fValue is " << fValue); QDEBUG("pValue is: " << pValue); QDEBUG("fValue_expected is " << fValue_expected); QDEBUG("pValue_expected is: " << pValue_expected); FuzzyCompare(fValue, fValue_expected, 1.e-5); FuzzyCompare(pValue, pValue_expected, 1.e-5); } void AnovaTest::twoWayAnova_data() { QTest::addColumn>("col1Data"); QTest::addColumn>("col2Data"); QTest::addColumn>("col3Data"); QTest::addColumn("fCol1Value_expected"); QTest::addColumn("fCol2Value_expected"); QTest::addColumn("fInteractionValue_expected"); QTest::addColumn("pCol1Value_expected"); QTest::addColumn("pCol2Value_expected"); // First Sample // This data set is taken from: http://statweb.stanford.edu/~susan/courses/s141/exanova.pdf QVector col1Data = {"Super", "Super", "Super", "Super", "Super", "Super", "Super", "Super", "Super", "Super", "Super", "Super", "Best", "Best", "Best", "Best", "Best", "Best", "Best", "Best", "Best", "Best", "Best", "Best"}; QVector col2Data = {"cold", "cold", "cold", "cold", "warm", "warm", "warm", "warm", "hot", "hot", "hot", "hot", "cold", "cold", "cold", "cold", "warm", "warm", "warm", "warm", "hot", "hot", "hot", "hot"}; QVector col3Data = {4, 5, 6, 5, 7, 9, 8, 12, 10, 12, 11, 9, 6, 6, 4, 4, 13, 15, 12, 12, 12, 13, 10, 13}; double fCol1Value_expected = 9.80885214008; double fCol2Value_expected = 48.7193579767; double fInteractionValue_expected = 3.97227626459; double pCol1Value_expected = 0.005758; double pCol2Value_expected = 5.44e-08; -// double pInteractionValue_expected = 0.037224; + // double pInteractionValue_expected = 0.037224; QTest::newRow("detergent vs temperature") << col1Data << col2Data << col3Data << - fCol1Value_expected << fCol2Value_expected << fInteractionValue_expected << - pCol1Value_expected << pCol2Value_expected; + fCol1Value_expected << fCol2Value_expected << fInteractionValue_expected << + pCol1Value_expected << pCol2Value_expected; } //TODO: check for pValue. In document probabilty is Pr(>F) void AnovaTest::twoWayAnova() { QFETCH(QVector, col1Data); QFETCH(QVector, col2Data); QFETCH(QVector, col3Data); QFETCH(double, fCol1Value_expected); QFETCH(double, fCol2Value_expected); QFETCH(double, fInteractionValue_expected); QFETCH(double, pCol1Value_expected); QFETCH(double, pCol2Value_expected); Column* col1 = new Column("col1", AbstractColumn::Text); Column* col2 = new Column("col2", AbstractColumn::Text); Column* col3 = new Column("col3", AbstractColumn::Numeric); col1->replaceTexts(0, col1Data); col2->replaceTexts(0, col2Data); col3->replaceValues(0, col3Data); QVector cols; cols << col1 << col2 << col3; HypothesisTest anovaTest("Two Way Anova"); anovaTest.setColumns(cols); - HypothesisTest::Test test; - test.type = HypothesisTest::Test::Type::Anova; - test.subtype = HypothesisTest::Test::SubType::TwoWay; - test.tail = HypothesisTest::Test::Tail::Two; + int test; + test = HypothesisTest::Anova; + test |= HypothesisTest::TwoWay; + anovaTest.setTail(HypothesisTest::Two); anovaTest.performTest(test); double fCol1Value = anovaTest.statisticValue()[0]; double fCol2Value = anovaTest.statisticValue()[1]; double fInteractionValue = anovaTest.statisticValue()[2]; double pCol1Value = anovaTest.pValue()[0]; double pCol2Value = anovaTest.pValue()[1]; QDEBUG("size of statistic value is " << anovaTest.statisticValue().size()); QDEBUG("fCol1Value is " << fCol1Value); QDEBUG("fCol1Value_expected is " << fCol1Value_expected); QDEBUG("fCol2Value is " << fCol2Value); QDEBUG("fCol2Value_expected is " << fCol2Value_expected); QDEBUG("fInteractionValue is " << fInteractionValue); QDEBUG("fInteractionValue_expected is " << fInteractionValue_expected); QDEBUG("pCol1Value is " << pCol1Value); QDEBUG("pCol1Value_expected is " << pCol1Value_expected); QDEBUG("pCol2Value is " << pCol2Value); QDEBUG("pCol2Value_expected is " << pCol2Value_expected); FuzzyCompare(fCol1Value, fCol1Value_expected, 1.e-5); FuzzyCompare(fCol2Value, fCol2Value_expected, 1.e-5); FuzzyCompare(fInteractionValue, fInteractionValue_expected, 1.e-5); FuzzyCompare(pCol1Value, pCol1Value_expected, 1.e-5); FuzzyCompare(pCol2Value, pCol2Value_expected, 1.e-5); } QTEST_MAIN(AnovaTest) diff --git a/tests/stats/ttest/TTestTest.cpp b/tests/stats/ttest/TTestTest.cpp index 807672d9c..5fe45a39c 100644 --- a/tests/stats/ttest/TTestTest.cpp +++ b/tests/stats/ttest/TTestTest.cpp @@ -1,202 +1,202 @@ /*************************************************************************** File : CorrelationTest.cpp Project : LabPlot Description : Tests for data correlation -------------------------------------------------------------------- Copyright : (C) 2019 Devanshu Agarwal (agarwaldevanshu8@gmail.com) ***************************************************************************/ /*************************************************************************** * * * This program is free software; you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * * the Free Software Foundation; either version 2 of the License, or * * (at your option) any later version. * * * * This program is distributed in the hope that it will be useful, * * but WITHOUT ANY WARRANTY; without even the implied warranty of * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * * GNU General Public License for more details. * * * * You should have received a copy of the GNU General Public License * * along with this program; if not, write to the Free Software * * Foundation, Inc., 51 Franklin Street, Fifth Floor, * * Boston, MA 02110-1301 USA * * * ***************************************************************************/ #include "TTestTest.h" #include "backend/generalTest/HypothesisTest.h" #include "backend/core/AbstractColumn.h" #include "backend/core/column/Column.h" //TODO: Decrease relative errors and increase more floating points for expected values. void TTestTest::twoSampleIndependent_data() { QTest::addColumn>("col1Data"); QTest::addColumn>("col2Data"); QTest::addColumn("tValue_expected"); QTest::addColumn("pValue_expected"); // First Sample // This data set is taken from "JASP" QVector col1Data = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}; QVector col2Data = {3, 1, 5, 4, 6, 4, 6, 2, 0, 5, 4, 5, 4, 3, 6, 6, 8, 5, 5, 4, 2, 5, 7, 5}; double tValue_expected = -1.71345710765; double pValue_expected = 0.100686; QTest::newRow("invisible cloak") << col1Data << col2Data << tValue_expected << pValue_expected; // Second Sample // This data set is taken from "JASP" col1Data = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; col2Data = {42, 46, 43, 10, 55, 17, 26, 60, 62, 53, 37, 42, 33, 37, 41, 42, 19, 55, 54, 28, 20, 48, 85, 24, 56, 43, 59, 58, 52, 71, 62, 43, 54, 49, 57, 61, 33, 44, 46, 67, 43, 49, 57, 53}; tValue_expected = -2.2665512460934725; pValue_expected = 0.028629483; QTest::newRow("directed control activities") << col1Data << col2Data << tValue_expected << pValue_expected; } void TTestTest::twoSampleIndependent() { QFETCH(QVector, col1Data); QFETCH(QVector, col2Data); QFETCH(double, tValue_expected); QFETCH(double, pValue_expected); Column* col1 = new Column("col1", AbstractColumn::Numeric); Column* col2 = new Column("col2", AbstractColumn::Numeric); col1->replaceValues(0, col1Data); col2->replaceValues(0, col2Data); QVector cols; cols << col1 << col2; HypothesisTest tTest("Two Sample Independent"); tTest.setColumns(cols); - HypothesisTest::Test test; - test.type = HypothesisTest::Test::Type::TTest; - test.subtype = HypothesisTest::Test::SubType::TwoSampleIndependent; - test.tail = HypothesisTest::Test::Tail::Two; + int test; + test = HypothesisTest::TTest; + test |= HypothesisTest::TwoSampleIndependent; + tTest.setTail(HypothesisTest::Two); bool categoricalVariable = true; bool equalVariance = true; tTest.performTest(test, categoricalVariable, equalVariance); double tValue = tTest.statisticValue()[0]; double pValue = tTest.pValue()[0]; qDebug() << "tValue is " << tValue; qDebug() << "pValue is: " << pValue; qDebug() << "tValue_expected is " << tValue_expected; qDebug() << "pValue_expected is: " << pValue_expected; FuzzyCompare(tValue, tValue_expected, 1.e-5); FuzzyCompare(pValue, pValue_expected, 1.e-5); } void TTestTest::twoSamplePaired_data() { QTest::addColumn>("col1Data"); QTest::addColumn>("col2Data"); QTest::addColumn("tValue_expected"); QTest::addColumn("pValue_expected"); // First Sample // This data set is taken from "JASP" // DATA SET:: Moon and Aggression QVector col1Data = {3.33, 3.67, 2.67, 3.33, 3.33, 3.67, 4.67, 2.67, 6, 4.33, 3.33, 0.67, 1.33, 0.33, 2}; QVector col2Data = {0.27, 0.59, 0.32, 0.19, 1.26, 0.11, 0.3, 0.4, 1.59, 0.6, 0.65, 0.69, 1.26, 0.23, 0.38}; double tValue_expected = 6.451788554; double pValue_expected = 1.51815e-05; QTest::newRow("Moon and Aggression") << col1Data << col2Data << tValue_expected << pValue_expected; } void TTestTest::twoSamplePaired() { QFETCH(QVector, col1Data); QFETCH(QVector, col2Data); QFETCH(double, tValue_expected); QFETCH(double, pValue_expected); Column* col1 = new Column("col1", AbstractColumn::Numeric); Column* col2 = new Column("col2", AbstractColumn::Numeric); col1->replaceValues(0, col1Data); col2->replaceValues(0, col2Data); QVector cols; cols << col1 << col2; HypothesisTest tTest("Two Sample Paried"); tTest.setColumns(cols); - HypothesisTest::Test test; - test.type = HypothesisTest::Test::Type::TTest; - test.subtype = HypothesisTest::Test::SubType::TwoSamplePaired; - test.tail = HypothesisTest::Test::Tail::Two; + int test; + test = HypothesisTest::TTest; + test |= HypothesisTest::TwoSamplePaired; + tTest.setTail(HypothesisTest::Two); tTest.performTest(test); double tValue = tTest.statisticValue()[0]; double pValue = tTest.pValue()[0]; qDebug() << "tValue is " << tValue; qDebug() << "pValue is: " << pValue; qDebug() << "tValue_expected is " << tValue_expected; qDebug() << "pValue_expected is: " << pValue_expected; FuzzyCompare(tValue, tValue_expected, 1.e-5); FuzzyCompare(pValue, pValue_expected, 1.e-5); } void TTestTest::oneSample_data() { QTest::addColumn>("col1Data"); QTest::addColumn("populationMean"); QTest::addColumn("tValue_expected"); QTest::addColumn("pValue_expected"); // First Sample // This data set is taken from "JASP" // DATA SET:: Weight Gain; QVector col1Data = {13.2, 8.58, 14.08, 8.58, 10.56, 14.74, 7.92, 13.2, 12.76, 5.72, 11.66, 7.04, 3.08, 15.62, 14.3, 5.5}; double populationMean = 16; double tValue_expected = -5.823250303; double pValue_expected = 3.35479e-05; QTest::newRow("weight gain") << col1Data << populationMean << tValue_expected << pValue_expected; } void TTestTest::oneSample() { QFETCH(QVector, col1Data); QFETCH(double, populationMean); QFETCH(double, tValue_expected); QFETCH(double, pValue_expected); Column* col1 = new Column("col1", AbstractColumn::Numeric); col1->replaceValues(0, col1Data); QVector cols; cols << col1; HypothesisTest tTest("One Sample"); tTest.setColumns(cols); tTest.setPopulationMean(populationMean); - HypothesisTest::Test test; - test.type = HypothesisTest::Test::Type::TTest; - test.subtype = HypothesisTest::Test::SubType::OneSample; - test.tail = HypothesisTest::Test::Tail::Two; + int test; + test = HypothesisTest::TTest; + test |= HypothesisTest::OneSample; + tTest.setTail(HypothesisTest::Two); tTest.performTest(test); double tValue = tTest.statisticValue()[0]; double pValue = tTest.pValue()[0]; qDebug() << "tValue is " << tValue; qDebug() << "pValue is: " << pValue; qDebug() << "tValue_expected is " << tValue_expected; qDebug() << "pValue_expected is: " << pValue_expected; FuzzyCompare(tValue, tValue_expected, 1.e-5); FuzzyCompare(pValue, pValue_expected, 1.e-5); } QTEST_MAIN(TTestTest)