diff --git a/src/backend/generalTest/CorrelationCoefficient.cpp b/src/backend/generalTest/CorrelationCoefficient.cpp index 0ccc06eb3..43b0c0533 100644 --- a/src/backend/generalTest/CorrelationCoefficient.cpp +++ b/src/backend/generalTest/CorrelationCoefficient.cpp @@ -1,419 +1,433 @@ /*************************************************************************** File : CorrelationCoefficient.cpp Project : LabPlot Description : Finding Correlation Coefficient on data provided -------------------------------------------------------------------- Copyright : (C) 2019 Devanshu Agarwal(agarwaldevanshu8@gmail.com) ***************************************************************************/ /*************************************************************************** * * * This program is free software; you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * * the Free Software Foundation; either version 2 of the License, or * * (at your option) any later version. * * * * This program is distributed in the hope that it will be useful, * * but WITHOUT ANY WARRANTY; without even the implied warranty of * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * * GNU General Public License for more details. * * * * You should have received a copy of the GNU General Public License * * along with this program; if not, write to the Free Software * * Foundation, Inc., 51 Franklin Street, Fifth Floor, * * Boston, MA 02110-1301 USA * * * ***************************************************************************/ #include "CorrelationCoefficient.h" #include "GeneralTest.h" #include "kdefrontend/generalTest/CorrelationCoefficientView.h" #include "backend/spreadsheet/Spreadsheet.h" #include "backend/core/column/Column.h" #include "backend/lib/macros.h" #include #include #include #include #include #include #include #include #include #include #include #include extern "C" { #include "backend/nsl/nsl_stats.h" } CorrelationCoefficient::CorrelationCoefficient(const QString &name) : GeneralTest (name, AspectType::CorrelationCoefficient) { } CorrelationCoefficient::~CorrelationCoefficient() { } void CorrelationCoefficient::performTest(Test test, bool categoricalVariable) { + //QDEBUG("in perform test"); + m_statsTable = ""; m_tooltips.clear(); + m_correlationValue = 0; + m_statisticValue.clear(); + m_pValue.clear(); for (int i = 0; i < RESULTLINESCOUNT; i++) m_resultLine[i]->clear(); switch (test) { case CorrelationCoefficient::Test::Pearson: { m_currTestName = "

" + i18n("Pearson's r Correlation Test") + "

"; performPearson(categoricalVariable); break; } case CorrelationCoefficient::Test::Kendall: m_currTestName = "

" + i18n("Kendall's Rank Correlation Test") + "

"; performKendall(); break; case CorrelationCoefficient::Test::Spearman: { m_currTestName = "

" + i18n("Spearman Correlation Coefficient Test") + "

"; performSpearman(); break; } } emit changed(); } -double CorrelationCoefficient::correlationValue() { +double CorrelationCoefficient::correlationValue() const{ return m_correlationValue; } +QList CorrelationCoefficient::statisticValue() const{ + return m_statisticValue; +} + +QList CorrelationCoefficient::pValue() const{ + return m_pValue; +} /*************************************************************************************************************************** * Private Implementations * ************************************************************************************************************************/ /*********************************************Pearson r ******************************************************************/ //Formulaes are taken from https://www.statisticssolutions.com/correlation-pearson-kendall-spearman/ // variables: // N = total number of observations // sumColx = sum of values in colx // sumSqColx = sum of square of values in colx // sumColxColy = sum of product of values in colx and coly //TODO: support for col1 is categorical. -//TODO: add symbols in stats table header. //TODO: add automatic test //TODO: add tooltip for correlation value result //TODO: find p value void CorrelationCoefficient::performPearson(bool categoricalVariable) { + + //QDEBUG("in pearson"); if (m_columns.count() != 2) { printError("Select only 2 columns "); return; } if (categoricalVariable) { printLine(1, "currently categorical variable not supported", "blue"); return; } QString col1Name = m_columns[0]->name(); QString col2Name = m_columns[1]->name(); if (!isNumericOrInteger(m_columns[1])) { printError("Column " + col2Name + " should contain only numeric or interger values"); } int N = findCount(m_columns[0]); if (N != findCount(m_columns[1])) { printError("Number of data values in Column: " + col1Name + "and Column: " + col2Name + "are not equal"); return; } double sumCol1 = findSum(m_columns[0], N); double sumCol2 = findSum(m_columns[1], N); double sumSqCol1 = findSumSq(m_columns[0], N); double sumSqCol2 = findSumSq(m_columns[1], N); double sumCol12 = 0; for (int i = 0; i < N; i++) sumCol12 += m_columns[0]->valueAt(i) * m_columns[1]->valueAt(i); // printing table; // cell constructor structure; data, level, rowSpanCount, m_columnspanCount, isHeader; QList rowMajor; int level = 0; // horizontal header + QString sigma = UTF8_QSTRING("Σ"); rowMajor.append(new Cell("", level, true)); + rowMajor.append(new Cell("N", level, true, "Total Number of Observations")); - rowMajor.append(new Cell("Sigma", level, true, "Sum of Scores in each column")); - rowMajor.append(new Cell("Sigma x2", level, true, "Sum of Squares of scores in each column")); - rowMajor.append(new Cell("Sigma xy", level, true, "Sum of Squares of scores in each column")); + rowMajor.append(new Cell(QString(sigma + "Scores"), level, true, "Sum of Scores in each column")); + rowMajor.append(new Cell(QString(sigma + "Scores2"), level, true, "Sum of Squares of scores in each column")); + rowMajor.append(new Cell(QString(sigma + "(" + UTF8_QSTRING("∏") + "Scores)"), level, true, "Sum of product of scores of both columns")); //data with vertical header. level++; rowMajor.append(new Cell(col1Name, level, true)); rowMajor.append(new Cell(N, level)); rowMajor.append(new Cell(sumCol1, level)); rowMajor.append(new Cell(sumSqCol1, level)); rowMajor.append(new Cell(sumCol12, level, false, "", 2, 1)); level++; rowMajor.append(new Cell(col2Name, level, true)); rowMajor.append(new Cell(N, level)); rowMajor.append(new Cell(sumCol2, level)); rowMajor.append(new Cell(sumSqCol2, level)); m_statsTable += getHtmlTable3(rowMajor); m_correlationValue = (N * sumCol12 - sumCol1*sumCol2) / sqrt((N * sumSqCol1 - gsl_pow_2(sumCol1)) * (N * sumSqCol2 - gsl_pow_2(sumCol2))); printLine(0, QString("Correlation Value is %1").arg(round(m_correlationValue)), "green"); } /***********************************************Kendall ******************************************************************/ // used knight algorithm for fast performance O(nlogn) rather than O(n^2) // http://adereth.github.io/blog/2013/10/30/efficiently-computing-kendalls-tau/ // TODO: Change date format type to original for numeric type; // TODO: add tooltips. // TODO: Compute tauB for ties. // TODO: find P Value from Z Value void CorrelationCoefficient::performKendall() { if (m_columns.count() != 2) { printError("Select only 2 columns "); return; } QString col1Name = m_columns[0]->name(); QString col2Name = m_columns[1]->name(); int N = findCount(m_columns[0]); if (N != findCount(m_columns[1])) { printError("Number of data values in Column: " + col1Name + "and Column: " + col2Name + "are not equal"); return; } int col2Ranks[N]; if (isNumericOrInteger(m_columns[0]) || isNumericOrInteger(m_columns[1])) { if (isNumericOrInteger(m_columns[0]) && isNumericOrInteger(m_columns[1])) { for (int i = 0; i < N; i++) col2Ranks[int(m_columns[0]->valueAt(i)) - 1] = int(m_columns[1]->valueAt(i)); } else { printError(QString("Ranking System should be same for both Column: %1 and Column: %2
" "Hint: Check for data types of columns").arg(col1Name).arg(col2Name)); return; } } else { AbstractColumn::ColumnMode origCol1Mode = m_columns[0]->columnMode(); AbstractColumn::ColumnMode origCol2Mode = m_columns[1]->columnMode(); m_columns[0]->setColumnMode(AbstractColumn::Text); m_columns[1]->setColumnMode(AbstractColumn::Text); QMap ValueToRank; for (int i = 0; i < N; i++) { if (ValueToRank[m_columns[0]->textAt(i)] != 0) { printError("Currently ties are not supported"); m_columns[0]->setColumnMode(origCol1Mode); m_columns[1]->setColumnMode(origCol2Mode); return; } ValueToRank[m_columns[0]->textAt(i)] = i + 1; } for (int i = 0; i < N; i++) col2Ranks[i] = ValueToRank[m_columns[1]->textAt(i)]; m_columns[0]->setColumnMode(origCol1Mode); m_columns[1]->setColumnMode(origCol2Mode); } int nPossiblePairs = (N * (N - 1)) / 2; int nDiscordant = findDiscordants(col2Ranks, 0, N - 1); int nCorcordant = nPossiblePairs - nDiscordant; - double tauA = double(nCorcordant - nDiscordant) / nPossiblePairs; + double m_correlationValue = double(nCorcordant - nDiscordant) / nPossiblePairs; - double zA = (3 * (nCorcordant - nDiscordant)) / - sqrt(N * (N- 1) * (2 * N + 5) / 2); + m_statisticValue.append((3 * (nCorcordant - nDiscordant)) / + sqrt(N * (N- 1) * (2 * N + 5) / 2)); printLine(0 , QString("Number of Discordants are %1").arg(nDiscordant), "green"); printLine(1 , QString("Number of Concordant are %1").arg(nCorcordant), "green"); - printLine(2 , QString("Tau a is %1").arg(round(tauA)), "green"); - printLine(3 , QString("Z Value is %1").arg(round(zA)), "green"); + printLine(2 , QString("Tau a is %1").arg(round(m_correlationValue)), "green"); + printLine(3 , QString("Z Value is %1").arg(round(m_statisticValue[0])), "green"); - m_correlationValue = tauA; return; } /***********************************************Spearman ******************************************************************/ // All formulaes and symbols are taken from : https://www.statisticshowto.datasciencecentral.com/spearman-rank-correlation-definition-calculate/ void CorrelationCoefficient::performSpearman() { if (m_columns.count() != 2) { printError("Select only 2 columns "); return; } QString col1Name = m_columns[0]->name(); QString col2Name = m_columns[1]->name(); int N = findCount(m_columns[0]); if (N != findCount(m_columns[1])) { printError("Number of data values in Column: " + col1Name + "and Column: " + col2Name + "are not equal"); return; } QMap col1Ranks; convertToRanks(m_columns[0], N, col1Ranks); QMap col2Ranks; convertToRanks(m_columns[1], N, col2Ranks); double ranksCol1Mean = 0; double ranksCol2Mean = 0; // QString ranks1 = ""; // QString ranks2 = ""; for (int i = 0; i < N; i++) { ranksCol1Mean += col1Ranks[int(m_columns[0]->valueAt(i))]; ranksCol2Mean += col2Ranks[int(m_columns[1]->valueAt(i))]; // ranks1 += ", " + QString::number(col1Ranks[m_columns[0]->valueAt(i)]); // ranks2 += ", " + QString::number(col2Ranks[m_columns[1]->valueAt(i)]); } ranksCol1Mean = ranksCol1Mean / N; ranksCol2Mean = ranksCol2Mean / N; //QDEBUG("ranks 1 and ranks2 are " ); //QDEBUG(ranks1); //QDEBUG(ranks2); //QDEBUG("Mean ranks are " << ranksCol1Mean << ranksCol2Mean); double s12 = 0; double s1 = 0; double s2 = 0; for (int i = 0; i < N; i++) { double centeredRank_1 = col1Ranks[int(m_columns[0]->valueAt(i))] - ranksCol1Mean; double centeredRank_2 = col2Ranks[int(m_columns[1]->valueAt(i))] - ranksCol2Mean; s12 += centeredRank_1 * centeredRank_2; s1 += gsl_pow_2(centeredRank_1); s2 += gsl_pow_2(centeredRank_2); } s12 = s12 / N; s1 = s1 / N; s2 = s2 / N; //QDEBUG("s12, s1, s2 are " << s12 << " " << s1 << " " << s2); m_correlationValue = s12 / std::sqrt(s1 * s2); printLine(0, QString("Spearman Rank Correlation value is %1").arg(m_correlationValue), "green"); } /***********************************************Helper Functions******************************************************************/ int CorrelationCoefficient::findDiscordants(int *ranks, int start, int end) { if (start >= end) return 0; int mid = (start + end) / 2; int leftDiscordants = findDiscordants(ranks, start, mid); int rightDiscordants = findDiscordants(ranks, mid + 1, end); int len = end - start + 1; int leftLen = mid - start + 1; int rightLen = end - mid; int leftLenRemain = leftLen; int leftRanks[leftLen]; int rightRanks[rightLen]; for (int i = 0; i < leftLen; i++) leftRanks[i] = ranks[start + i]; for (int i = leftLen; i < leftLen + rightLen; i++) rightRanks[i - leftLen] = ranks[start + i]; int mergeDiscordants = 0; int i = 0, j = 0, k =0; while (i < len) { if (j >= leftLen) { ranks[start + i] = rightRanks[k]; k++; } else if (k >= rightLen) { ranks[start + i] = leftRanks[j]; j++; } else if (leftRanks[j] < rightRanks[k]) { ranks[start + i] = leftRanks[j]; j++; leftLenRemain--; } else if (leftRanks[j] > rightRanks[k]) { ranks[start + i] = rightRanks[k]; mergeDiscordants += leftLenRemain; k++; } i++; } return leftDiscordants + rightDiscordants + mergeDiscordants; } void CorrelationCoefficient::convertToRanks(const Column* col, int N, QMap &ranks) { if (!isNumericOrInteger(col)) return; //QDEBUG("in convert to ranks"); double* sortedList = new double[N]; for (int i = 0; i < N; i++) sortedList[i] = col->valueAt(i); std::sort(sortedList, sortedList + N, std::greater()); // QString debug_sortedList = ""; ranks.clear(); for (int i = 0; i < N; i++) { ranks[sortedList[i]] = i + 1; // debug_sortedList += ", " + QString::number(sortedList[i]); } //QDEBUG("sorted list is " << debug_sortedList); delete[] sortedList; } void CorrelationCoefficient::convertToRanks(const Column* col, QMap &ranks) { convertToRanks(col, findCount(col), ranks); } /***********************************************Virtual Functions******************************************************************/ QWidget* CorrelationCoefficient::view() const { if (!m_partView) { m_view = new CorrelationCoefficientView(const_cast(this)); m_partView = m_view; } return m_partView; } diff --git a/src/backend/generalTest/CorrelationCoefficient.h b/src/backend/generalTest/CorrelationCoefficient.h index 241bbe754..24af099db 100644 --- a/src/backend/generalTest/CorrelationCoefficient.h +++ b/src/backend/generalTest/CorrelationCoefficient.h @@ -1,72 +1,78 @@ -/*************************************************************************** +/*************************************************************************** File : CorrelationCoefficient.h Project : LabPlot Description : Finding Correlation Coefficient on data provided -------------------------------------------------------------------- Copyright : (C) 2019 Devanshu Agarwal(agarwaldevanshu8@gmail.com) ***************************************************************************/ /*************************************************************************** * * * This program is free software; you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * * the Free Software Foundation; either version 2 of the License, or * * (at your option) any later version. * * * * This program is distributed in the hope that it will be useful, * * but WITHOUT ANY WARRANTY; without even the implied warranty of * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * * GNU General Public License for more details. * * * * You should have received a copy of the GNU General Public License * * along with this program; if not, write to the Free Software * * Foundation, Inc., 51 Franklin Street, Fifth Floor, * * Boston, MA 02110-1301 USA * * * ***************************************************************************/ #ifndef CORRELATIONCOEFFICIENT_H #define CORRELATIONCOEFFICIENT_H #include "backend/core/AbstractPart.h" #include "GeneralTest.h" #include "backend/lib/macros.h" class CorrelationCoefficientView; class Spreadsheet; class QString; class Column; class QVBoxLayout; class QLabel; class CorrelationCoefficient : public GeneralTest { Q_OBJECT public: explicit CorrelationCoefficient(const QString& name); ~CorrelationCoefficient() override; enum Test{ Pearson, Kendall, Spearman }; - double correlationValue(); + + double correlationValue() const; + QList statisticValue() const; + QList pValue() const; + QWidget* view() const override; void performTest(Test m_test, bool categoricalVariable = true); private: void performPearson(bool categoricalVariable); void performKendall(); void performSpearman(); int findDiscordants(int* ranks, int start, int end); void convertToRanks(const Column* col, int N, QMap &ranks); void convertToRanks(const Column* col, QMap &ranks); double m_correlationValue; + QList m_statisticValue; + QList m_pValue; }; #endif // CORRELATIONCOEFFICIENT_H diff --git a/src/backend/generalTest/HypothesisTest.cpp b/src/backend/generalTest/HypothesisTest.cpp index b550c4357..16057794b 100644 --- a/src/backend/generalTest/HypothesisTest.cpp +++ b/src/backend/generalTest/HypothesisTest.cpp @@ -1,1143 +1,1143 @@ /*************************************************************************** File : HypothesisTest.cpp Project : LabPlot Description : Doing Hypothesis-Test on data provided -------------------------------------------------------------------- Copyright : (C) 2019 Devanshu Agarwal(agarwaldevanshu8@gmail.com) ***************************************************************************/ /*************************************************************************** * * * This program is free software; you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * * the Free Software Foundation; either version 2 of the License, or * * (at your option) any later version. * * * * This program is distributed in the hope that it will be useful, * * but WITHOUT ANY WARRANTY; without even the implied warranty of * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * * GNU General Public License for more details. * * * * You should have received a copy of the GNU General Public License * * along with this program; if not, write to the Free Software * * Foundation, Inc., 51 Franklin Street, Fifth Floor, * * Boston, MA 02110-1301 USA * * * ***************************************************************************/ #include "HypothesisTest.h" #include "kdefrontend/generalTest/HypothesisTestView.h" #include "backend/spreadsheet/Spreadsheet.h" #include "backend/core/column/Column.h" #include "backend/lib/macros.h" #include #include #include #include #include #include #include #include #include #include #include extern "C" { #include "backend/nsl/nsl_stats.h" } HypothesisTest::HypothesisTest(const QString &name) : GeneralTest (name, AspectType::HypothesisTest) { } HypothesisTest::~HypothesisTest() { } void HypothesisTest::setPopulationMean(QVariant m_populationMean) { m_populationMean = m_populationMean.toDouble(); } void HypothesisTest::setSignificanceLevel(QVariant alpha) { m_significanceLevel = alpha.toDouble(); } void HypothesisTest::performTest(Test test, bool categoricalVariable, bool equalVariance) { m_tailType = test.tail; m_pValue.clear(); m_statisticValue.clear(); m_statsTable = ""; m_tooltips.clear(); for (int i = 0; i < RESULTLINESCOUNT; i++) m_resultLine[i]->clear(); switch (test.subtype) { case HypothesisTest::Test::SubType::TwoSampleIndependent: { m_currTestName = "

" + i18n("Two Sample Independent Test") + "

"; performTwoSampleIndependentTest(test.type, categoricalVariable, equalVariance); break; } case HypothesisTest::Test::SubType::TwoSamplePaired: m_currTestName = "

" + i18n("Two Sample Paired Test") + "

"; performTwoSamplePairedTest(test.type); break; case HypothesisTest::Test::SubType::OneSample: { m_currTestName = "

" + i18n("One Sample Test") + "

"; performOneSampleTest(test.type); break; } case HypothesisTest::Test::SubType::OneWay: { m_currTestName = "

" + i18n("One Way Anova") + "

"; performOneWayAnova(); break; } case HypothesisTest::Test::SubType::TwoWay: { m_currTestName = "

" + i18n("Two Way Anova") + "

"; performTwoWayAnova(); break; } case HypothesisTest::Test::SubType::NoneSubType: break; } emit changed(); } void HypothesisTest::performLeveneTest(bool categoricalVariable) { m_pValue.clear(); m_statisticValue.clear(); m_statsTable = ""; m_tooltips.clear(); for (int i = 0; i < RESULTLINESCOUNT; i++) m_resultLine[i]->clear(); m_currTestName = "

" + i18n("Levene Test for Equality of Variance") + "

"; m_performLeveneTest(categoricalVariable); emit changed(); } -QList HypothesisTest::statisticValue() { +QList& HypothesisTest::statisticValue(){ return m_statisticValue; } -QList HypothesisTest::pValue() { +QList& HypothesisTest::pValue(){ return m_pValue; } /****************************************************************************** * Private Implementations * ****************************************************************************/ //TODO: backend of z test; //TODO: add tooltip to tables. (currently it is not possible to use with QTextDocument); //TODO: use https://www.gnu.org/software/gsl/doc/html/statistics.html for basic statistic calculations /**************************Two Sample Independent *************************************/ void HypothesisTest::performTwoSampleIndependentTest(HypothesisTest::Test::Type test, bool categoricalVariable, bool equalVariance) { if (m_columns.size() != 2) { printError("Inappropriate number of m_columns selected"); return; } int n[2]; double sum[2], mean[2], std[2]; QString col1Name = m_columns[0]->name(); QString col2Name = m_columns[1]->name(); if (!categoricalVariable && isNumericOrInteger(m_columns[0])) { for (int i = 0; i < 2; i++) { findStats(m_columns[i], n[i], sum[i], mean[i], std[i]); if (n[i] == 0) { printError("Atleast two values should be there in every column"); return; } if (std[i] <= 0) { printError(i18n("Standard Deviation of atleast one column is equal to 0: last column is: %1", m_columns[i]->name())); return; } } } else { QMap colName; QString baseColName; int np; int totalRows; countPartitions(m_columns[0], np, totalRows); if (np != 2) { printError( i18n("Number of Categorical Variable in Column %1 is not equal to 2", m_columns[0]->name())); return; } if (isNumericOrInteger(m_columns[0])) baseColName = m_columns[0]->name(); ErrorType errorCode = findStatsCategorical(m_columns[0], m_columns[1], n, sum, mean, std, colName, np, totalRows); switch (errorCode) { case ErrorUnqualSize: { printError( i18n("Unequal size between Column %1 and Column %2", m_columns[0]->name(), m_columns[1]->name())); return; } case ErrorEmptyColumn: { printError("At least one of selected column is empty"); return; } case NoError: break; } QMapIterator i(colName); while (i.hasNext()) { i.next(); if (i.value() == 1) col1Name = baseColName + " " + i.key(); else col2Name = baseColName + " " + i.key(); } } QVariant rowMajor[] = {"", "N", "Sum", "Mean", "Std", col1Name, n[0], sum[0], mean[0], std[0], col2Name, n[1], sum[1], mean[1], std[1] }; m_statsTable = getHtmlTable(3, 5, rowMajor); for (int i = 0; i < 2; i++) { if (n[i] == 0) { printError("Atleast two values should be there in every column"); return; } if (std[i] <= 0) { printError( i18n("Standard Deviation of atleast one column is equal to 0: last column is: %1", m_columns[i]->name())); return; } } QString testName; int df = 0; double sp = 0; switch (test) { case HypothesisTest::Test::Type::TTest: { testName = "T"; if (equalVariance) { df = n[0] + n[1] - 2; sp = qSqrt(((n[0]-1) * gsl_pow_2(std[0]) + (n[1]-1) * gsl_pow_2(std[1]) ) / df ); m_statisticValue.append((mean[0] - mean[1]) / (sp * qSqrt(1.0/n[0] + 1.0/n[1]))); printLine(9, "Assumption: Equal Variance b/w both population means"); } else { double temp_val; temp_val = gsl_pow_2( gsl_pow_2(std[0]) / n[0] + gsl_pow_2(std[1]) / n[1]); temp_val = temp_val / ( (gsl_pow_2( (gsl_pow_2(std[0]) / n[0]) ) / (n[0]-1)) + (gsl_pow_2( (gsl_pow_2(std[1]) / n[1]) ) / (n[1]-1))); df = qRound(temp_val); m_statisticValue.append((mean[0] - mean[1]) / (qSqrt( (gsl_pow_2(std[0])/n[0]) + (gsl_pow_2(std[1])/n[1])))); printLine(9, "Assumption: UnEqual Variance b/w both population means"); } printLine(8, "Assumption: Both Populations approximately follow normal distribution"); break; } case HypothesisTest::Test::Type::ZTest: { testName = "Z"; sp = qSqrt( ((n[0]-1) * gsl_pow_2(std[0]) + (n[1]-1) * gsl_pow_2(std[1])) / df); m_statisticValue.append((mean[0] - mean[1]) / (sp * qSqrt( 1.0 / n[0] + 1.0 / n[1]))); // m_pValue.append(gsl_cdf_gaussian_P(m_statisticValue, sp)); break; } case HypothesisTest::Test::Type::Anova: case HypothesisTest::Test::Type::NoneType: break; } m_currTestName = "

" + i18n("Two Sample Independent %1 Test for %2 vs %3", testName, col1Name, col2Name) + "

"; m_pValue.append(getPValue(test, m_statisticValue[0], col1Name, col2Name, (mean[0] - mean[1]), sp, df)); printLine(2, i18n("Significance level is %1", round(m_significanceLevel)), "blue"); printLine(4, i18n("%1 Value is %2 ", testName, round(m_statisticValue[0])), "green"); printTooltip(4, i18n("More is the |%1-value|, more safely we can reject the null hypothesis", testName)); printLine(5, i18n("P Value is %1 ", m_pValue[0]), "green"); printLine(6, i18n("Degree of Freedom is %1", df), "green"); printTooltip(6, i18n("Number of independent Pieces of information that went into calculating the estimate")); if (m_pValue[0] <= m_significanceLevel) printTooltip(5, i18n("We can safely reject Null Hypothesis for significance level %1", round(m_significanceLevel))); else printTooltip(5, i18n("There is a plausibility for Null Hypothesis to be true")); return; } /********************************Two Sample Paired ***************************************/ void HypothesisTest::performTwoSamplePairedTest(HypothesisTest::Test::Type test) { if (m_columns.size() != 2) { printError("Inappropriate number of m_columns selected"); return; } for (int i = 0; i < 2; i++) { if ( !isNumericOrInteger(m_columns[0])) { printError("select only m_columns with numbers"); return; } } int n; double sum, mean, std; ErrorType errorCode = findStatsPaired(m_columns[0], m_columns[1], n, sum, mean, std); switch (errorCode) { case ErrorUnqualSize: { printError("both m_columns are having different sizes"); return; } case ErrorEmptyColumn: { printError("m_columns are empty"); return; } case NoError: break; } QVariant rowMajor[] = {"", "N", "Sum", "Mean", "Std", "difference", n, sum, mean, std }; m_statsTable = getHtmlTable(2, 5, rowMajor); if (std <= 0) { printError("Standard deviation of the difference is 0"); return; } QString testName; int df = 0; switch (test) { case HypothesisTest::Test::Type::TTest: { m_statisticValue[0] = mean / (std / qSqrt(n)); df = n - 1; testName = "T"; printLine(6, i18n("Degree of Freedom is %1name(), i18n("%1", m_populationMean), mean, std, df)); m_currTestName = "

" + i18n("One Sample %1 Test for %2 vs %3", testName, m_columns[0]->name(), m_columns[1]->name()) + "

"; printLine(2, i18n("Significance level is %1 ", round(m_significanceLevel)), "blue"); printLine(4, i18n("%1 Value is %2 ", testName, round(m_statisticValue[0])), "green"); printLine(5, i18n("P Value is %1 ", m_pValue[0]), "green"); if (m_pValue[0] <= m_significanceLevel) printTooltip(5, i18n("We can safely reject Null Hypothesis for significance level %1", m_significanceLevel)); else printTooltip(5, i18n("There is a plausibility for Null Hypothesis to be true")); return; } /******************************** One Sample ***************************************/ void HypothesisTest::performOneSampleTest(HypothesisTest::Test::Type test) { if (m_columns.size() != 1) { printError("Inappropriate number of m_columns selected"); return; } if ( !isNumericOrInteger(m_columns[0])) { printError("select only m_columns with numbers"); return; } int n; double sum, mean, std; ErrorType errorCode = findStats(m_columns[0], n, sum, mean, std); switch (errorCode) { case ErrorEmptyColumn: { printError("column is empty"); return; } case NoError: break; case ErrorUnqualSize: { return; } } QVariant rowMajor[] = {"", "N", "Sum", "Mean", "Std", m_columns[0]->name(), n, sum, mean, std }; m_statsTable = getHtmlTable(2, 5, rowMajor); if (std <= 0) { printError("Standard deviation is 0"); return; } QString testName; int df = 0; switch (test) { case HypothesisTest::Test::Type::TTest: { testName = "T"; m_statisticValue.append((mean - m_populationMean) / (std / qSqrt(n))); df = n - 1; printLine(6, i18n("Degree of Freedom is %1", df), "blue"); break; } case HypothesisTest::Test::Type::ZTest: { testName = "Z"; df = 0; m_statisticValue.append((mean - m_populationMean) / (std / qSqrt(n))); break; } case HypothesisTest::Test::Type::Anova: case HypothesisTest::Test::Type::NoneType: break; } m_pValue.append(getPValue(test, m_statisticValue[0], m_columns[0]->name(), i18n("%1",m_populationMean), mean - m_populationMean, std, df)); m_currTestName = "

" + i18n("One Sample %1 Test for %2", testName, m_columns[0]->name()) + "

"; printLine(2, i18n("Significance level is %1", round(m_significanceLevel)), "blue"); printLine(4, i18n("%1 Value is %2", testName, round(m_statisticValue[0])), "green"); printLine(5, i18n("P Value is %1", m_pValue[0]), "green"); if (m_pValue[0] <= m_significanceLevel) printTooltip(5, i18n("We can safely reject Null Hypothesis for significance level %1", m_significanceLevel)); else printTooltip(5, i18n("There is a plausibility for Null Hypothesis to be true")); return; } /*************************************One Way Anova***************************************/ // all standard variables and formulas are taken from this wikipedia page: // https://en.wikipedia.org/wiki/One-way_analysis_of_variance // b stands for b/w groups // w stands for within groups // np is number of partition i.e., number of classes void HypothesisTest::performOneWayAnova() { int np, totalRows; countPartitions(m_columns[0], np, totalRows); int* ni = new int[np]; double* sum = new double[np]; double* mean = new double[np]; double* std = new double[np]; QString* colNames = new QString[np]; QMap classnameToIndex; QString baseColName; if (isNumericOrInteger(m_columns[0])) baseColName = m_columns[0]->name(); findStatsCategorical(m_columns[0], m_columns[1], ni, sum, mean, std, classnameToIndex, np, totalRows); double yBar = 0; // overall mean double sB = 0; // sum of squares of (mean - overall_mean) between the groups int fB = 0; // degree of freedom between the groups double msB = 0; // mean sum of squares between the groups double sW = 0; // sum of squares of (value - mean of group) within the groups int fW = 0; // degree of freedom within the group double msW = 0; // mean sum of squares within the groups // now finding mean of each group; for (int i = 0; i < np; i++) yBar += mean[i]; yBar = yBar / np; for (int i = 0; i < np; i++) { sB += ni[i] * gsl_pow_2( ( mean[i] - yBar)); if (ni[i] > 1) sW += gsl_pow_2( std[i])*(ni[i] - 1); else sW += gsl_pow_2( std[i]); fW += ni[i] - 1; } fB = np - 1; msB = sB / fB; msW = sW / fW; m_statisticValue.append(msB / msW); m_pValue.append(nsl_stats_fdist_p(m_statisticValue[0], static_cast(np-1), fW)); QMapIterator i(classnameToIndex); while (i.hasNext()) { i.next(); colNames[i.value()-1] = baseColName + " " + i.key(); } // now printing the statistics and result; int rowCount = np + 1, columnCount = 5; QVariant* rowMajor = new QVariant[rowCount*columnCount]; // header data; rowMajor[0] = ""; rowMajor[1] = "Ni"; rowMajor[2] = "Sum"; rowMajor[3] = "Mean"; rowMajor[4] = "Std"; // table data for (int row_i = 1; row_i < rowCount ; row_i++) { rowMajor[row_i*columnCount] = colNames[row_i - 1]; rowMajor[row_i*columnCount + 1] = ni[row_i - 1]; rowMajor[row_i*columnCount + 2] = sum[row_i - 1]; rowMajor[row_i*columnCount + 3] = mean[row_i - 1]; rowMajor[row_i*columnCount + 4] = std[row_i - 1]; } m_statsTable = "

" + i18n("Group Summary Statistics") + "

"; m_statsTable += getHtmlTable(rowCount, columnCount, rowMajor); m_statsTable += getLine(""); m_statsTable += getLine(""); m_statsTable += "

" + i18n("Grand Summary Statistics") + "

"; m_statsTable += getLine(""); m_statsTable += getLine(i18n("Overall Mean is %1", round(yBar))); rowCount = 4; columnCount = 3; rowMajor->clear(); rowMajor[0] = ""; rowMajor[1] = "Between Groups"; rowMajor[2] = "Within Groups"; int baseIndex = 0; baseIndex = 1 * columnCount; rowMajor[baseIndex + 0] = "Sum of Squares"; rowMajor[baseIndex + 1] = sB; rowMajor[baseIndex + 2] = sW; baseIndex = 2 * columnCount; rowMajor[baseIndex + 0] = "Degree of Freedom"; rowMajor[baseIndex + 1] = fB; rowMajor[baseIndex + 2] = fW; baseIndex = 3 * columnCount; rowMajor[baseIndex + 0] = "Mean Square Value"; rowMajor[baseIndex + 1] = msB; rowMajor[baseIndex + 2] = msW; m_statsTable += getHtmlTable(rowCount, columnCount, rowMajor); delete[] ni; delete[] sum; delete[] mean; delete[] std; delete[] colNames; printLine(1, i18n("F Value is %1", round(m_statisticValue[0])), "green"); printLine(2, i18n("P Value is %1 ", m_pValue[0]), "green"); if (m_pValue[0] <= m_significanceLevel) printTooltip(2, i18n("We can safely reject Null Hypothesis for significance level %1", m_significanceLevel)); else printTooltip(2, i18n("There is a plausibility for Null Hypothesis to be true")); return; } /*************************************Two Way Anova***************************************/ // all formulas and symbols are taken from: http://statweb.stanford.edu/~susan/courses/s141/exanova.pdf //TODO: suppress warning of variable length array are a C99 feature. //TODO: add assumptions verification option //TODO: add tail option (if needed) void HypothesisTest::performTwoWayAnova() { int np_a, totalRows_a; int np_b, totalRows_b; countPartitions(m_columns[0], np_a, totalRows_a); countPartitions(m_columns[1], np_b, totalRows_b); double groupMean[np_a][np_b]; int replicates[np_a][np_b]; for (int i = 0; i < np_a; i++) for (int j = 0; j < np_b; j++) { groupMean[i][j] = 0; replicates[i][j] = 0; } if (totalRows_a != totalRows_b) { printError("There is missing data in atleast one of the rows"); return; } QMap catToNumber_a; QMap catToNumber_b; int partitionNumber_a = 1; int partitionNumber_b = 1; for (int i = 0; i < totalRows_a; i++) { QString name_a = m_columns[0]->textAt(i); QString name_b = m_columns[1]->textAt(i); double value = m_columns[2]->valueAt(i); if (catToNumber_a[name_a] == 0) { catToNumber_a[name_a] = partitionNumber_a; partitionNumber_a++; } if (catToNumber_b[name_b] == 0) { catToNumber_b[name_b] = partitionNumber_b; partitionNumber_b++; } groupMean[catToNumber_a[name_a] - 1][catToNumber_b[name_b] - 1] += value; replicates[catToNumber_a[name_a] - 1][catToNumber_b[name_b] - 1] += 1; } int replicate = replicates[0][0]; for (int i = 0; i < np_a; i++) for (int j = 0; j < np_b; j++) { if (replicates[i][j] == 0) { printError("Dataset should have atleast one data value corresponding to each feature combination"); return; } if (replicates[i][j] != replicate) { printError("Number of experiments perfomed for each combination of levels
" "between Independet Var.1 and Independent Var.2 must be equal"); return; } groupMean[i][j] /= replicates[i][j]; } double ss_within = 0; for (int i = 0; i < totalRows_a; i++) { QString name_a = m_columns[0]->textAt(i); QString name_b = m_columns[1]->textAt(i); double value = m_columns[2]->valueAt(i); ss_within += gsl_pow_2(value - groupMean[catToNumber_a[name_a] - 1][catToNumber_b[name_b] - 1]); } int df_within = (replicate - 1) * np_a * np_b; double ms_within = ss_within / df_within; double* mean_a = new double[np_a]; double* mean_b = new double[np_b]; for (int i = 0; i < np_a; i++) { for (int j = 0; j < np_b; j++) { mean_a[i] += groupMean[i][j] / np_b; mean_b[j] += groupMean[i][j] / np_a; } } double mean = 0; for (int i = 0; i < np_a; i++) mean += mean_a[i] / np_a; double ss_a = 0; for (int i = 0; i < np_a; i++) ss_a += gsl_pow_2(mean_a[i] - mean); ss_a *= replicate * np_b; int df_a = np_a - 1; double ms_a = ss_a / df_a; double ss_b = 0; for (int i = 0; i < np_b; i++) ss_b += gsl_pow_2(mean_b[i] - mean); ss_b *= replicate * np_a; int df_b = np_b - 1; double ms_b = ss_b / df_b; double ss_interaction = 0; for (int i = 0; i < np_a; i++) for (int j = 0; j < np_b; j++) ss_interaction += gsl_pow_2(groupMean[i][j] - mean_a[i] - mean_b[j] + mean); ss_interaction *= replicate; int df_interaction = (np_a - 1) * (np_b - 1); double ms_interaction = ss_interaction / df_interaction; QString* partitionNames_a = new QString[np_a]; QString* partitionNames_b = new QString[np_b]; QMapIterator itr_a(catToNumber_a); while (itr_a.hasNext()) { itr_a.next(); partitionNames_a[itr_a.value()-1] = itr_a.key(); } QMapIterator itr_b(catToNumber_b); while (itr_b.hasNext()) { itr_b.next(); partitionNames_b[itr_b.value()-1] = itr_b.key(); } // printing table; // cell constructor structure; data, level, rowSpanCount, m_columnspanCount, isHeader; QList rowMajor; rowMajor.append(new Cell("", 0, true, "", 2, 1)); for (int i = 0; i < np_b; i++) rowMajor.append(new Cell(partitionNames_b[i], 0, true, "", 1, 2)); rowMajor.append(new Cell("Mean", 0, true, "", 2)); for (int i = 0; i < np_b; i++) { rowMajor.append(new Cell("Mean", 1, true)); rowMajor.append(new Cell("Replicate", 1, true)); } int level = 2; for (int i = 0; i < np_a; i++) { rowMajor.append(new Cell(partitionNames_a[i], level, true)); for (int j = 0; j < np_b; j++) { rowMajor.append(new Cell(round(groupMean[i][j]), level)); rowMajor.append(new Cell(replicates[i][j], level)); } rowMajor.append(new Cell(round(mean_a[i]), level)); level++; } rowMajor.append(new Cell("Mean", level, true)); for (int i = 0; i < np_b; i++) rowMajor.append(new Cell(round(mean_b[i]), level, false, "", 1, 2)); rowMajor.append(new Cell(round(mean), level)); m_statsTable = "

" + i18n("Contingency Table") + "

"; m_statsTable += getHtmlTable3(rowMajor); m_statsTable += "
"; m_statsTable += "

" + i18n("results table") + "

"; rowMajor.clear(); level = 0; rowMajor.append(new Cell("", level, true)); rowMajor.append(new Cell("SS", level, true)); rowMajor.append(new Cell("DF", level, true, "degree of freedom")); rowMajor.append(new Cell("MS", level, true)); level++; rowMajor.append(new Cell(m_columns[0]->name(), level, true)); rowMajor.append(new Cell(round(ss_a), level)); rowMajor.append(new Cell(df_a, level)); rowMajor.append(new Cell(round(ms_a), level)); level++; rowMajor.append(new Cell(m_columns[1]->name(), level, true)); rowMajor.append(new Cell(round(ss_b), level)); rowMajor.append(new Cell(df_b, level)); rowMajor.append(new Cell(round(ms_b), level)); level++; rowMajor.append(new Cell("Interaction", level, true)); rowMajor.append(new Cell(round(ss_interaction), level)); rowMajor.append(new Cell(df_interaction, level)); rowMajor.append(new Cell(round(ms_interaction), level)); level++; rowMajor.append(new Cell("Within", level, true)); rowMajor.append(new Cell(round(ss_within), level)); rowMajor.append(new Cell(df_within, level)); rowMajor.append(new Cell(round(ms_within), level)); m_statsTable += getHtmlTable3(rowMajor); double fValue_a = ms_a / ms_within; double fValue_b = ms_b / ms_within; double fValue_interaction = ms_interaction / ms_within; double m_pValue_a = nsl_stats_fdist_p(fValue_a, static_cast(np_a - 1), df_a); double m_pValue_b = nsl_stats_fdist_p(fValue_b, static_cast(np_b - 1), df_b); printLine(0, "F(df" + m_columns[0]->name() + ", dfwithin) is " + round(fValue_a), "blue"); printLine(1, "F(df" + m_columns[1]->name() + ", dfwithin) is " + round(fValue_b), "blue"); printLine(2, "F(dfinteraction, dfwithin) is " + round(fValue_interaction), "blue"); printLine(4, "P(df" + m_columns[0]->name() + ", dfwithin) is " + round(m_pValue_a), "blue"); printLine(5, "P(df" + m_columns[1]->name() + ", dfwithin) is " + round(m_pValue_b), "blue"); // printLine(2, "P(dfinteraction, dfwithin) is " + round(fValue_interaction), "blue"); m_statisticValue.append(fValue_a); m_statisticValue.append(fValue_b); m_statisticValue.append(fValue_interaction); m_pValue.append(m_pValue_a); m_pValue.append(m_pValue_b); delete[] mean_a; delete[] mean_b; delete[] partitionNames_a; delete[] partitionNames_b; return; } /**************************************Levene Test****************************************/ // Some reference to local variables. // np = number of partitions // df = degree of fredom // totalRows = total number of rows in column // these variables are taken from: https://en.wikipedia.org/wiki/Levene%27s_test // yiBar = mean of ith group; // Zij = |Yij - yiBar| // ziBar = mean of Zij for group i // ziBarBar = mean for all zij // ni = number of elements in group i void HypothesisTest::m_performLeveneTest(bool categoricalVariable) { if (m_columns.size() != 2) { printError("Inappropriate number of m_columns selected"); return; } int np = 0; int n = 0; if (!categoricalVariable && isNumericOrInteger(m_columns[0])) np = m_columns.size(); else countPartitions(m_columns[0], np, n); if (np < 2) { printError("Select atleast two m_columns / classes"); return; } double* yiBar = new double[np]; double* ziBar = new double[np]; double ziBarBar = 0; double* ni = new double[np]; for (int i = 0; i < np; i++) { yiBar[i] = 0; ziBar[i] = 0; ni[i] = 0; } double fValue; int df = 0; int totalRows = 0; QString* colNames = new QString[np]; if (!categoricalVariable && isNumericOrInteger(m_columns[0])) { totalRows = m_columns[0]->rowCount(); double value = 0; for (int j = 0; j < totalRows; j++) { int numberNaNCols = 0; for (int i = 0; i < np; i++) { value = m_columns[i]->valueAt(j); if (std::isnan(value)) { numberNaNCols++; continue; } yiBar[i] += value; ni[i]++; n++; } if (numberNaNCols == np) { totalRows = j; break; } } for (int i = 0; i < np; i++) { if (ni[i] > 0) yiBar[i] = yiBar[i] / ni[i]; else { printError("One of the selected m_columns is empty
" "or have choosen Independent Var.1 wrongly"); return; } } for (int j = 0; j < totalRows; j++) { for (int i = 0; i < np; i++) { value = m_columns[i]->valueAt(j); if (!(std::isnan(value))) ziBar[i] += fabs(value - yiBar[i]); } } for (int i = 0; i < np; i++) { ziBarBar += ziBar[i]; if (ni[i] > 0) ziBar[i] = ziBar[i] / ni[i]; } ziBarBar = ziBarBar / n; double numberatorValue = 0; double denominatorValue = 0; for (int j = 0; j < totalRows; j++) { for (int i = 0; i < np; i++) { value = m_columns[i]->valueAt(j); if (!(std::isnan(value))) { double zij = fabs(value - yiBar[i]); denominatorValue += gsl_pow_2( (zij - ziBar[i])); } } } if (denominatorValue <= 0) { printError( i18n("Denominator value is %1", denominatorValue)); return; } for (int i = 0; i < np; i++) { colNames[i] = m_columns[i]->name(); numberatorValue += ni[i]*gsl_pow_2( (ziBar[i]-ziBarBar)); } fValue = ((n - np) / (np - 1)) * (numberatorValue / denominatorValue); } else { QMap classnameToIndex; AbstractColumn::ColumnMode originalColMode = m_columns[0]->columnMode(); m_columns[0]->setColumnMode(AbstractColumn::Text); int partitionNumber = 1; QString name; double value; int classIndex; for (int j = 0; j < n; j++) { name = m_columns[0]->textAt(j); value = m_columns[1]->valueAt(j); if (std::isnan(value)) { n = j; break; } if (classnameToIndex[name] == 0) { classnameToIndex[name] = partitionNumber; partitionNumber++; } classIndex = classnameToIndex[name]-1; ni[classIndex]++; yiBar[classIndex] += value; } for (int i = 0; i < np; i++) { if (ni[i] > 0) yiBar[i] = yiBar[i] / ni[i]; else { printError("One of the selected m_columns is empty
" "or have choosen Independent Var.1 wrongly"); m_columns[0]->setColumnMode(originalColMode); return; } } for (int j = 0; j < n; j++) { name = m_columns[0]->textAt(j); value = m_columns[1]->valueAt(j); classIndex = classnameToIndex[name] - 1; ziBar[classIndex] += fabs(value - yiBar[classIndex]); } for (int i = 0; i < np; i++) { ziBarBar += ziBar[i]; ziBar[i] = ziBar[i] / ni[i]; } ziBarBar = ziBarBar / n; double numberatorValue = 0; double denominatorValue = 0; for (int j = 0; j < n; j++) { name = m_columns[0]->textAt(j); value = m_columns[1]->valueAt(j); classIndex = classnameToIndex[name] - 1; double zij = fabs(value - yiBar[classIndex]); denominatorValue += gsl_pow_2( (zij - ziBar[classIndex])); } for (int i = 0; i < np; i++) numberatorValue += ni[i]*gsl_pow_2( (ziBar[i]-ziBarBar)); if (denominatorValue <= 0) { printError( "number of data points is less or than equal to number of categorical variables"); m_columns[0]->setColumnMode(originalColMode); return; } fValue = ((n - np) / (np - 1)) * (numberatorValue / denominatorValue); QMapIterator i(classnameToIndex); while (i.hasNext()) { i.next(); colNames[i.value()-1] = m_columns[0]->name() + " " + i.key(); } m_columns[0]->setColumnMode(originalColMode); } df = n - np; // now making the stats table. int rowCount = np+1; int columnCount = 4; QVariant* rowMajor = new QVariant[rowCount*columnCount]; // header data; rowMajor[0] = ""; rowMajor[1] = "Ni"; rowMajor[2] = "yiBar"; rowMajor[3] = "ziBar"; // table data for (int row_i = 1; row_i < rowCount; row_i++) { rowMajor[row_i*columnCount] = colNames[row_i-1]; rowMajor[row_i*columnCount + 1] = ni[row_i-1]; rowMajor[row_i*columnCount + 2] = yiBar[row_i-1]; rowMajor[row_i*columnCount + 3] = ziBar[row_i-1]; } m_statsTable = getHtmlTable(rowCount, columnCount, rowMajor); delete[] rowMajor; delete[] yiBar; delete[] ziBar; delete[] ni; m_pValue.append(nsl_stats_fdist_p(fValue, static_cast(np-1), df)); printLine(0, "Null Hypothesis: Variance is equal between all classes", "blue"); printLine(1, "Alternate Hypothesis: Variance is not equal in at-least one pair of classes", "blue"); printLine(2, i18n("Significance level is %1", round(m_significanceLevel)), "blue"); printLine(4, i18n("F Value is %1 ", round(fValue)), "green"); printLine(5, i18n("P Value is %1 ", m_pValue[0]), "green"); printLine(6, i18n("Degree of Freedom is %1", df), "green"); if (m_pValue[0] <= m_significanceLevel) { printTooltip(5, i18n("We can safely reject Null Hypothesis for significance level %1", m_significanceLevel)); printLine(8, "Requirement for homogeneity is not met", "red"); } else { printTooltip(5, i18n("There is a plausibility for Null Hypothesis to be true")); printLine(8, "Requirement for homogeneity is met", "green"); } m_statisticValue.append(fValue); return; } //TODO change ("⋖") symbol to ("<"), currently macro UTF8_QSTRING is not working properly if used "<" symbol; // TODO: check for correctness between: for TestZ with TailTwo // m_pValue.append(2*gsl_cdf_tdist_P(value, df) v/s // m_pValue.append(gsl_cdf_tdis_P(value, df) + gsl_cdf_tdis_P(-value, df); double HypothesisTest::getPValue(const HypothesisTest::Test::Type& test, double& value, const QString& col1Name, const QString& col2Name, const double mean, const double sp, const int df) { switch (test) { case HypothesisTest::Test::Type::TTest: { switch (m_tailType) { case HypothesisTest::Test::Tail::Negative: { m_pValue.append(gsl_cdf_tdist_P(value, df)); printLine(0, i18n("Null Hypothesis: Population mean of %1 %2 Population mean of %3", col1Name, UTF8_QSTRING("≥"), col2Name), "blue"); printLine(1, i18n("Alternate Hypothesis: Population mean of %1 %2 Population mean of %3", col1Name, UTF8_QSTRING("⋖"), col2Name), "blue"); break; } case HypothesisTest::Test::Tail::Positive: { value *= -1; m_pValue.append(gsl_cdf_tdist_P(value, df)); printLine(0, i18n("Null Hypothesis: Population mean of %1 %2 Population mean of %3", col1Name, UTF8_QSTRING("≤"), col2Name), "blue"); printLine(1, i18n("Alternate Hypothesis: Population mean of %1 %2 Population mean of %3", col1Name, UTF8_QSTRING(">"), col2Name), "blue"); break; } case HypothesisTest::Test::Tail::Two: { m_pValue.append(2.*gsl_cdf_tdist_P(-fabs(value), df)); printLine(0, i18n("Null Hypothesis: Population mean of %1 %2 Population mean of %3", col1Name, UTF8_QSTRING("="), col2Name), "blue"); printLine(1, i18n("Alternate Hypothesis: Population mean of %1 %2 Population mean of %3", col1Name, UTF8_QSTRING("≠"), col2Name), "blue"); break; } } break; } case HypothesisTest::Test::Type::ZTest: { switch (m_tailType) { case HypothesisTest::Test::Tail::Negative: { m_pValue.append(gsl_cdf_gaussian_P(value - mean, sp)); printLine(0, i18n("Null Hypothesis: Population mean of %1 %2 Population mean of %3 ", col1Name, UTF8_QSTRING("≥"), col2Name), "blue"); printLine(1, i18n("Alternate Hypothesis: Population mean of %1 %2 Population mean of %3 ", col1Name, UTF8_QSTRING("⋖"), col2Name), "blue"); break; } case HypothesisTest::Test::Tail::Positive: { value *= -1; m_pValue.append(nsl_stats_tdist_p(value - mean, sp)); printLine(0, i18n("Null Hypothesis: Population mean of %1 %2 Population mean of %3 ", col1Name, UTF8_QSTRING("≤"), col2Name), "blue"); printLine(1, i18n("Alternate Hypothesis: Population mean of %1 %2 Population mean of %3 ", col1Name, UTF8_QSTRING(">"), col2Name), "blue"); break; } case HypothesisTest::Test::Tail::Two: { m_pValue.append(2.*gsl_cdf_gaussian_P(value - mean, sp)); printLine(0, i18n("Null Hypothesis: Population mean of %1 %2 Population mean of %3 ", col1Name, UTF8_QSTRING("="), col2Name), "blue"); printLine(1, i18n("Alternate Hypothesis: Population mean of %1 %2 Population mean of %3 ", col1Name, UTF8_QSTRING("≠"), col2Name), "blue"); break; } } break; } case HypothesisTest::Test::Type::Anova: case HypothesisTest::Test::Type::NoneType: break; } if (m_pValue[0] > 1) return 1; return m_pValue[0]; } // Virtual functions QWidget* HypothesisTest::view() const { if (!m_partView) { m_view = new HypothesisTestView(const_cast(this)); m_partView = m_view; } return m_partView; } diff --git a/src/backend/generalTest/HypothesisTest.h b/src/backend/generalTest/HypothesisTest.h index 95ec19205..ef00b8906 100644 --- a/src/backend/generalTest/HypothesisTest.h +++ b/src/backend/generalTest/HypothesisTest.h @@ -1,100 +1,100 @@ /*************************************************************************** File : HypothesisTest.h Project : LabPlot Description : Doing Hypothesis-Test on data provided -------------------------------------------------------------------- Copyright : (C) 2019 Devanshu Agarwal(agarwaldevanshu8@gmail.com) ***************************************************************************/ /*************************************************************************** * * * This program is free software; you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * * the Free Software Foundation; either version 2 of the License, or * * (at your option) any later version. * * * * This program is distributed in the hope that it will be useful, * * but WITHOUT ANY WARRANTY; without even the implied warranty of * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * * GNU General Public License for more details. * * * * You should have received a copy of the GNU General Public License * * along with this program; if not, write to the Free Software * * Foundation, Inc., 51 Franklin Street, Fifth Floor, * * Boston, MA 02110-1301 USA * * * ***************************************************************************/ #ifndef HYPOTHESISTEST_H #define HYPOTHESISTEST_H #include "backend/core/AbstractPart.h" #include "GeneralTest.h" #include "backend/lib/macros.h" class HypothesisTestView; class Spreadsheet; class QString; class Column; class QVBoxLayout; class QLabel; class HypothesisTest : public GeneralTest { Q_OBJECT public: explicit HypothesisTest(const QString& name); ~HypothesisTest() override; struct Test { enum Type { NoneType = 0, TTest = 1 << 0, ZTest = 1 << 1, Anova = 1 << 2 }; enum SubType { NoneSubType = 0, TwoSampleIndependent = 1 << 0, TwoSamplePaired = 1 << 1, OneSample = 1 << 2, OneWay = 1 << 3, TwoWay = 1 << 4 }; enum Tail {Positive, Negative, Two}; Type type = NoneType; SubType subtype = NoneSubType; Tail tail; }; void setPopulationMean(QVariant populationMean); void setSignificanceLevel(QVariant alpha); void performTest(Test m_test, bool categoricalVariable = true, bool equalVariance = true); void performLeveneTest(bool categoricalVariable); - QList statisticValue(); - QList pValue(); + QList& statisticValue(); + QList& pValue(); QWidget* view() const override; private: void performTwoSampleIndependentTest(HypothesisTest::Test::Type test, bool categoricalVariable = false, bool equalVariance = true); void performTwoSamplePairedTest(HypothesisTest::Test::Type test); void performOneSampleTest(HypothesisTest::Test::Type test); void performOneWayAnova(); void performTwoWayAnova(); void m_performLeveneTest(bool categoricalVariable); double getPValue(const HypothesisTest::Test::Type& test, double& value, const QString& col1Name, const QString& col2name, const double mean, const double sp, const int df); double m_populationMean; double m_significanceLevel; HypothesisTest::Test::Tail m_tailType; QList m_pValue; QList m_statisticValue; }; #endif // HypothesisTest_H diff --git a/src/kdefrontend/generalTest/GeneralTestView.cpp b/src/kdefrontend/generalTest/GeneralTestView.cpp index 6a11b4e9c..fb98793e8 100644 --- a/src/kdefrontend/generalTest/GeneralTestView.cpp +++ b/src/kdefrontend/generalTest/GeneralTestView.cpp @@ -1,197 +1,197 @@ /*************************************************************************** File : GeneralTestView.cpp Project : LabPlot Description : View class for Hypothesis Tests' Table -------------------------------------------------------------------- Copyright : (C) 2019 Devanshu Agarwal(agarwaldevanshu8@gmail.com) ***************************************************************************/ /*************************************************************************** * * * This program is free software; you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * * the Free Software Foundation; either version 2 of the License, or * * (at your option) any later version. * * * * This program is distributed in the hope that it will be useful, * * but WITHOUT ANY WARRANTY; without even the implied warranty of * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * * GNU General Public License for more details. * * * * You should have received a copy of the GNU General Public License * * along with this program; if not, write to the Free Software * * Foundation, Inc., 51 Franklin Street, Fifth Floor, * * Boston, MA 02110-1301 USA * * * ***************************************************************************/ #include "GeneralTestView.h" #include "backend/generalTest/GeneralTest.h" #include "backend/lib/macros.h" #include "backend/lib/trace.h" #include #include #include #include #include #include #include #include #include #include #include /*! \class GeneralTestView \brief View class for Hypothesis Test \ingroup kdefrontend */ GeneralTestView::GeneralTestView(GeneralTest* GeneralTest) : QWidget(), m_generalTest(GeneralTest), m_testName(new QLabel()), m_statsTable(new QTextEdit()), m_summaryResults(new QWidget()) { m_statsTable->setReadOnly(true); auto* layout = new QVBoxLayout(this); layout->addWidget(m_testName); layout->addWidget(m_statsTable); layout->addWidget(m_summaryResults); layout->addStretch(1); init(); } GeneralTestView::~GeneralTestView() = default; void GeneralTestView::init() { initActions(); initMenus(); m_statsTable->setMouseTracking(true); // m_summaryResults->setStyleSheet("background-color:white; border: 0px; margin: 0px; padding 0px;qproperty-frame: false;"); connect(m_generalTest, &GeneralTest::changed, this, &GeneralTestView::changed); connect(m_statsTable, &QTextEdit::cursorPositionChanged, this, &GeneralTestView::cursorPositionChanged); } void GeneralTestView::initActions() { } void GeneralTestView::initMenus() { } void GeneralTestView::clearResult() { for (int i = 0; i < RESULTLINESCOUNT; i++) m_resultLine[i]->clear(); } void GeneralTestView::connectActions() { } void GeneralTestView::fillToolBar(QToolBar* toolBar) { Q_UNUSED(toolBar); } /*! * Populates the menu \c menu with the pivot table and pivot table view relevant actions. * The menu is used * - as the context menu in PivotTableView * - as the "pivot table menu" in the main menu-bar (called form MainWin) * - as a part of the pivot table context menu in project explorer */ void GeneralTestView::createContextMenu(QMenu* menu) { Q_ASSERT(menu); } bool GeneralTestView::exportView() { return true; } bool GeneralTestView::printView() { QPrinter printer; auto* dlg = new QPrintDialog(&printer, this); dlg->setWindowTitle(i18nc("@title:window", "Print Spreadsheet")); bool ret; if ((ret = dlg->exec()) == QDialog::Accepted) { print(&printer); } delete dlg; return ret; } bool GeneralTestView::printPreview() { QPrintPreviewDialog* dlg = new QPrintPreviewDialog(this); connect(dlg, &QPrintPreviewDialog::paintRequested, this, &GeneralTestView::print); return dlg->exec(); } /*! prints the complete spreadsheet to \c printer. */ void GeneralTestView::print(QPrinter* printer) const { WAIT_CURSOR; QPainter painter (printer); RESET_CURSOR; } void GeneralTestView::changed() { m_testName->setText(m_generalTest->testName()); if (m_generalTest->statsTable().isEmpty()) m_statsTable->hide(); else { m_statsTable->setHtml(m_generalTest->statsTable()); m_statsTable->show(); } m_summaryResults->setLayout(m_generalTest->summaryLayout()); } void GeneralTestView::cursorPositionChanged() { QTextCursor cursor = m_statsTable->textCursor(); - cursor.select(QTextCursor::WordUnderCursor); + cursor.select(QTextCursor::LineUnderCursor); QMap tooltips = m_generalTest->tooltips(); if (!cursor.selectedText().isEmpty()) QToolTip::showText(QCursor::pos(), QString("%1") .arg(tooltips.value(cursor.selectedText()))); else QToolTip::hideText(); } void GeneralTestView::exportToFile(const QString& path, const bool exportHeader, const QString& separator, QLocale::Language language) const { Q_UNUSED(exportHeader); Q_UNUSED(separator); Q_UNUSED(language); QFile file(path); if (!file.open(QFile::WriteOnly | QFile::Truncate)) return; PERFTRACE("export pivot table to file"); } void GeneralTestView::exportToLaTeX(const QString & path, const bool exportHeaders, const bool gridLines, const bool captions, const bool latexHeaders, const bool skipEmptyRows, const bool exportEntire) const { Q_UNUSED(exportHeaders); Q_UNUSED(gridLines); Q_UNUSED(captions); Q_UNUSED(latexHeaders); Q_UNUSED(skipEmptyRows); Q_UNUSED(exportEntire); QFile file(path); if (!file.open(QFile::WriteOnly | QFile::Truncate)) return; PERFTRACE("export pivot table to latex"); }