diff --git a/src/backend/generalTest/CorrelationCoefficient.cpp b/src/backend/generalTest/CorrelationCoefficient.cpp index 43b0c0533..c71a99348 100644 --- a/src/backend/generalTest/CorrelationCoefficient.cpp +++ b/src/backend/generalTest/CorrelationCoefficient.cpp @@ -1,433 +1,437 @@ /*************************************************************************** File : CorrelationCoefficient.cpp Project : LabPlot Description : Finding Correlation Coefficient on data provided -------------------------------------------------------------------- Copyright : (C) 2019 Devanshu Agarwal(agarwaldevanshu8@gmail.com) ***************************************************************************/ /*************************************************************************** * * * This program is free software; you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * * the Free Software Foundation; either version 2 of the License, or * * (at your option) any later version. * * * * This program is distributed in the hope that it will be useful, * * but WITHOUT ANY WARRANTY; without even the implied warranty of * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * * GNU General Public License for more details. * * * * You should have received a copy of the GNU General Public License * * along with this program; if not, write to the Free Software * * Foundation, Inc., 51 Franklin Street, Fifth Floor, * * Boston, MA 02110-1301 USA * * * ***************************************************************************/ #include "CorrelationCoefficient.h" #include "GeneralTest.h" #include "kdefrontend/generalTest/CorrelationCoefficientView.h" #include "backend/spreadsheet/Spreadsheet.h" #include "backend/core/column/Column.h" #include "backend/lib/macros.h" #include #include #include #include #include #include #include #include #include #include #include #include extern "C" { #include "backend/nsl/nsl_stats.h" } CorrelationCoefficient::CorrelationCoefficient(const QString &name) : GeneralTest (name, AspectType::CorrelationCoefficient) { } CorrelationCoefficient::~CorrelationCoefficient() { } void CorrelationCoefficient::performTest(Test test, bool categoricalVariable) { //QDEBUG("in perform test"); m_statsTable = ""; m_tooltips.clear(); m_correlationValue = 0; m_statisticValue.clear(); m_pValue.clear(); for (int i = 0; i < RESULTLINESCOUNT; i++) m_resultLine[i]->clear(); switch (test) { case CorrelationCoefficient::Test::Pearson: { m_currTestName = "

" + i18n("Pearson's r Correlation Test") + "

"; performPearson(categoricalVariable); break; } case CorrelationCoefficient::Test::Kendall: m_currTestName = "

" + i18n("Kendall's Rank Correlation Test") + "

"; performKendall(); break; case CorrelationCoefficient::Test::Spearman: { m_currTestName = "

" + i18n("Spearman Correlation Coefficient Test") + "

"; performSpearman(); break; } } emit changed(); } double CorrelationCoefficient::correlationValue() const{ return m_correlationValue; } QList CorrelationCoefficient::statisticValue() const{ return m_statisticValue; } QList CorrelationCoefficient::pValue() const{ return m_pValue; } /*************************************************************************************************************************** * Private Implementations * ************************************************************************************************************************/ /*********************************************Pearson r ******************************************************************/ //Formulaes are taken from https://www.statisticssolutions.com/correlation-pearson-kendall-spearman/ // variables: // N = total number of observations // sumColx = sum of values in colx // sumSqColx = sum of square of values in colx // sumColxColy = sum of product of values in colx and coly //TODO: support for col1 is categorical. //TODO: add automatic test //TODO: add tooltip for correlation value result //TODO: find p value void CorrelationCoefficient::performPearson(bool categoricalVariable) { //QDEBUG("in pearson"); if (m_columns.count() != 2) { printError("Select only 2 columns "); return; } if (categoricalVariable) { printLine(1, "currently categorical variable not supported", "blue"); return; } QString col1Name = m_columns[0]->name(); QString col2Name = m_columns[1]->name(); if (!isNumericOrInteger(m_columns[1])) { printError("Column " + col2Name + " should contain only numeric or interger values"); } int N = findCount(m_columns[0]); if (N != findCount(m_columns[1])) { printError("Number of data values in Column: " + col1Name + "and Column: " + col2Name + "are not equal"); return; } double sumCol1 = findSum(m_columns[0], N); double sumCol2 = findSum(m_columns[1], N); double sumSqCol1 = findSumSq(m_columns[0], N); double sumSqCol2 = findSumSq(m_columns[1], N); double sumCol12 = 0; for (int i = 0; i < N; i++) sumCol12 += m_columns[0]->valueAt(i) * m_columns[1]->valueAt(i); // printing table; // cell constructor structure; data, level, rowSpanCount, m_columnspanCount, isHeader; QList rowMajor; int level = 0; // horizontal header QString sigma = UTF8_QSTRING("Σ"); rowMajor.append(new Cell("", level, true)); rowMajor.append(new Cell("N", level, true, "Total Number of Observations")); rowMajor.append(new Cell(QString(sigma + "Scores"), level, true, "Sum of Scores in each column")); rowMajor.append(new Cell(QString(sigma + "Scores2"), level, true, "Sum of Squares of scores in each column")); rowMajor.append(new Cell(QString(sigma + "(" + UTF8_QSTRING("∏") + "Scores)"), level, true, "Sum of product of scores of both columns")); //data with vertical header. level++; rowMajor.append(new Cell(col1Name, level, true)); rowMajor.append(new Cell(N, level)); rowMajor.append(new Cell(sumCol1, level)); rowMajor.append(new Cell(sumSqCol1, level)); rowMajor.append(new Cell(sumCol12, level, false, "", 2, 1)); level++; rowMajor.append(new Cell(col2Name, level, true)); rowMajor.append(new Cell(N, level)); rowMajor.append(new Cell(sumCol2, level)); rowMajor.append(new Cell(sumSqCol2, level)); m_statsTable += getHtmlTable3(rowMajor); m_correlationValue = (N * sumCol12 - sumCol1*sumCol2) / sqrt((N * sumSqCol1 - gsl_pow_2(sumCol1)) * (N * sumSqCol2 - gsl_pow_2(sumCol2))); printLine(0, QString("Correlation Value is %1").arg(round(m_correlationValue)), "green"); } /***********************************************Kendall ******************************************************************/ // used knight algorithm for fast performance O(nlogn) rather than O(n^2) // http://adereth.github.io/blog/2013/10/30/efficiently-computing-kendalls-tau/ // TODO: Change date format type to original for numeric type; // TODO: add tooltips. // TODO: Compute tauB for ties. // TODO: find P Value from Z Value void CorrelationCoefficient::performKendall() { + QDEBUG("in perform kendall") + if (m_columns.count() != 2) { printError("Select only 2 columns "); return; } QString col1Name = m_columns[0]->name(); QString col2Name = m_columns[1]->name(); int N = findCount(m_columns[0]); if (N != findCount(m_columns[1])) { printError("Number of data values in Column: " + col1Name + "and Column: " + col2Name + "are not equal"); + QDEBUG("unequal number of rows") return; } int col2Ranks[N]; if (isNumericOrInteger(m_columns[0]) || isNumericOrInteger(m_columns[1])) { if (isNumericOrInteger(m_columns[0]) && isNumericOrInteger(m_columns[1])) { for (int i = 0; i < N; i++) col2Ranks[int(m_columns[0]->valueAt(i)) - 1] = int(m_columns[1]->valueAt(i)); } else { printError(QString("Ranking System should be same for both Column: %1 and Column: %2
" "Hint: Check for data types of columns").arg(col1Name).arg(col2Name)); + QDEBUG("ranking system not same") return; } } else { AbstractColumn::ColumnMode origCol1Mode = m_columns[0]->columnMode(); AbstractColumn::ColumnMode origCol2Mode = m_columns[1]->columnMode(); m_columns[0]->setColumnMode(AbstractColumn::Text); m_columns[1]->setColumnMode(AbstractColumn::Text); QMap ValueToRank; for (int i = 0; i < N; i++) { if (ValueToRank[m_columns[0]->textAt(i)] != 0) { printError("Currently ties are not supported"); m_columns[0]->setColumnMode(origCol1Mode); m_columns[1]->setColumnMode(origCol2Mode); return; } ValueToRank[m_columns[0]->textAt(i)] = i + 1; } for (int i = 0; i < N; i++) col2Ranks[i] = ValueToRank[m_columns[1]->textAt(i)]; m_columns[0]->setColumnMode(origCol1Mode); m_columns[1]->setColumnMode(origCol2Mode); } int nPossiblePairs = (N * (N - 1)) / 2; int nDiscordant = findDiscordants(col2Ranks, 0, N - 1); int nCorcordant = nPossiblePairs - nDiscordant; - double m_correlationValue = double(nCorcordant - nDiscordant) / nPossiblePairs; + m_correlationValue = double(nCorcordant - nDiscordant) / nPossiblePairs; m_statisticValue.append((3 * (nCorcordant - nDiscordant)) / sqrt(N * (N- 1) * (2 * N + 5) / 2)); printLine(0 , QString("Number of Discordants are %1").arg(nDiscordant), "green"); printLine(1 , QString("Number of Concordant are %1").arg(nCorcordant), "green"); printLine(2 , QString("Tau a is %1").arg(round(m_correlationValue)), "green"); printLine(3 , QString("Z Value is %1").arg(round(m_statisticValue[0])), "green"); return; } /***********************************************Spearman ******************************************************************/ // All formulaes and symbols are taken from : https://www.statisticshowto.datasciencecentral.com/spearman-rank-correlation-definition-calculate/ void CorrelationCoefficient::performSpearman() { if (m_columns.count() != 2) { printError("Select only 2 columns "); return; } QString col1Name = m_columns[0]->name(); QString col2Name = m_columns[1]->name(); int N = findCount(m_columns[0]); if (N != findCount(m_columns[1])) { printError("Number of data values in Column: " + col1Name + "and Column: " + col2Name + "are not equal"); return; } QMap col1Ranks; convertToRanks(m_columns[0], N, col1Ranks); QMap col2Ranks; convertToRanks(m_columns[1], N, col2Ranks); double ranksCol1Mean = 0; double ranksCol2Mean = 0; // QString ranks1 = ""; // QString ranks2 = ""; for (int i = 0; i < N; i++) { ranksCol1Mean += col1Ranks[int(m_columns[0]->valueAt(i))]; ranksCol2Mean += col2Ranks[int(m_columns[1]->valueAt(i))]; // ranks1 += ", " + QString::number(col1Ranks[m_columns[0]->valueAt(i)]); // ranks2 += ", " + QString::number(col2Ranks[m_columns[1]->valueAt(i)]); } ranksCol1Mean = ranksCol1Mean / N; ranksCol2Mean = ranksCol2Mean / N; //QDEBUG("ranks 1 and ranks2 are " ); //QDEBUG(ranks1); //QDEBUG(ranks2); //QDEBUG("Mean ranks are " << ranksCol1Mean << ranksCol2Mean); double s12 = 0; double s1 = 0; double s2 = 0; for (int i = 0; i < N; i++) { double centeredRank_1 = col1Ranks[int(m_columns[0]->valueAt(i))] - ranksCol1Mean; double centeredRank_2 = col2Ranks[int(m_columns[1]->valueAt(i))] - ranksCol2Mean; s12 += centeredRank_1 * centeredRank_2; s1 += gsl_pow_2(centeredRank_1); s2 += gsl_pow_2(centeredRank_2); } s12 = s12 / N; s1 = s1 / N; s2 = s2 / N; //QDEBUG("s12, s1, s2 are " << s12 << " " << s1 << " " << s2); m_correlationValue = s12 / std::sqrt(s1 * s2); printLine(0, QString("Spearman Rank Correlation value is %1").arg(m_correlationValue), "green"); } /***********************************************Helper Functions******************************************************************/ int CorrelationCoefficient::findDiscordants(int *ranks, int start, int end) { if (start >= end) return 0; int mid = (start + end) / 2; int leftDiscordants = findDiscordants(ranks, start, mid); int rightDiscordants = findDiscordants(ranks, mid + 1, end); int len = end - start + 1; int leftLen = mid - start + 1; int rightLen = end - mid; int leftLenRemain = leftLen; int leftRanks[leftLen]; int rightRanks[rightLen]; for (int i = 0; i < leftLen; i++) leftRanks[i] = ranks[start + i]; for (int i = leftLen; i < leftLen + rightLen; i++) rightRanks[i - leftLen] = ranks[start + i]; int mergeDiscordants = 0; int i = 0, j = 0, k =0; while (i < len) { if (j >= leftLen) { ranks[start + i] = rightRanks[k]; k++; } else if (k >= rightLen) { ranks[start + i] = leftRanks[j]; j++; } else if (leftRanks[j] < rightRanks[k]) { ranks[start + i] = leftRanks[j]; j++; leftLenRemain--; } else if (leftRanks[j] > rightRanks[k]) { ranks[start + i] = rightRanks[k]; mergeDiscordants += leftLenRemain; k++; } i++; } return leftDiscordants + rightDiscordants + mergeDiscordants; } void CorrelationCoefficient::convertToRanks(const Column* col, int N, QMap &ranks) { if (!isNumericOrInteger(col)) return; //QDEBUG("in convert to ranks"); double* sortedList = new double[N]; for (int i = 0; i < N; i++) sortedList[i] = col->valueAt(i); std::sort(sortedList, sortedList + N, std::greater()); // QString debug_sortedList = ""; ranks.clear(); for (int i = 0; i < N; i++) { ranks[sortedList[i]] = i + 1; // debug_sortedList += ", " + QString::number(sortedList[i]); } //QDEBUG("sorted list is " << debug_sortedList); delete[] sortedList; } void CorrelationCoefficient::convertToRanks(const Column* col, QMap &ranks) { convertToRanks(col, findCount(col), ranks); } /***********************************************Virtual Functions******************************************************************/ QWidget* CorrelationCoefficient::view() const { if (!m_partView) { m_view = new CorrelationCoefficientView(const_cast(this)); m_partView = m_view; } return m_partView; } diff --git a/src/backend/generalTest/CorrelationCoefficient.h b/src/backend/generalTest/CorrelationCoefficient.h index 24af099db..98adcde02 100644 --- a/src/backend/generalTest/CorrelationCoefficient.h +++ b/src/backend/generalTest/CorrelationCoefficient.h @@ -1,78 +1,71 @@ /*************************************************************************** File : CorrelationCoefficient.h Project : LabPlot Description : Finding Correlation Coefficient on data provided -------------------------------------------------------------------- Copyright : (C) 2019 Devanshu Agarwal(agarwaldevanshu8@gmail.com) ***************************************************************************/ /*************************************************************************** * * * This program is free software; you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * * the Free Software Foundation; either version 2 of the License, or * * (at your option) any later version. * * * * This program is distributed in the hope that it will be useful, * * but WITHOUT ANY WARRANTY; without even the implied warranty of * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * * GNU General Public License for more details. * * * * You should have received a copy of the GNU General Public License * * along with this program; if not, write to the Free Software * * Foundation, Inc., 51 Franklin Street, Fifth Floor, * * Boston, MA 02110-1301 USA * * * ***************************************************************************/ #ifndef CORRELATIONCOEFFICIENT_H #define CORRELATIONCOEFFICIENT_H -#include "backend/core/AbstractPart.h" #include "GeneralTest.h" -#include "backend/lib/macros.h" class CorrelationCoefficientView; -class Spreadsheet; -class QString; -class Column; -class QVBoxLayout; -class QLabel; class CorrelationCoefficient : public GeneralTest { Q_OBJECT public: explicit CorrelationCoefficient(const QString& name); ~CorrelationCoefficient() override; enum Test{ Pearson, Kendall, Spearman }; double correlationValue() const; QList statisticValue() const; QList pValue() const; QWidget* view() const override; - void performTest(Test m_test, bool categoricalVariable = true); + void performTest(Test m_test, bool categoricalVariable = false); private: void performPearson(bool categoricalVariable); void performKendall(); void performSpearman(); int findDiscordants(int* ranks, int start, int end); void convertToRanks(const Column* col, int N, QMap &ranks); void convertToRanks(const Column* col, QMap &ranks); double m_correlationValue; QList m_statisticValue; QList m_pValue; }; #endif // CORRELATIONCOEFFICIENT_H diff --git a/src/backend/generalTest/HypothesisTest.cpp b/src/backend/generalTest/HypothesisTest.cpp index 16057794b..4ab760228 100644 --- a/src/backend/generalTest/HypothesisTest.cpp +++ b/src/backend/generalTest/HypothesisTest.cpp @@ -1,1143 +1,1143 @@ /*************************************************************************** File : HypothesisTest.cpp Project : LabPlot Description : Doing Hypothesis-Test on data provided -------------------------------------------------------------------- Copyright : (C) 2019 Devanshu Agarwal(agarwaldevanshu8@gmail.com) ***************************************************************************/ /*************************************************************************** * * * This program is free software; you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * * the Free Software Foundation; either version 2 of the License, or * * (at your option) any later version. * * * * This program is distributed in the hope that it will be useful, * * but WITHOUT ANY WARRANTY; without even the implied warranty of * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * * GNU General Public License for more details. * * * * You should have received a copy of the GNU General Public License * * along with this program; if not, write to the Free Software * * Foundation, Inc., 51 Franklin Street, Fifth Floor, * * Boston, MA 02110-1301 USA * * * ***************************************************************************/ #include "HypothesisTest.h" #include "kdefrontend/generalTest/HypothesisTestView.h" #include "backend/spreadsheet/Spreadsheet.h" #include "backend/core/column/Column.h" #include "backend/lib/macros.h" #include #include #include #include #include #include #include #include #include #include #include extern "C" { #include "backend/nsl/nsl_stats.h" } HypothesisTest::HypothesisTest(const QString &name) : GeneralTest (name, AspectType::HypothesisTest) { } HypothesisTest::~HypothesisTest() { } void HypothesisTest::setPopulationMean(QVariant m_populationMean) { m_populationMean = m_populationMean.toDouble(); } void HypothesisTest::setSignificanceLevel(QVariant alpha) { m_significanceLevel = alpha.toDouble(); } void HypothesisTest::performTest(Test test, bool categoricalVariable, bool equalVariance) { m_tailType = test.tail; m_pValue.clear(); m_statisticValue.clear(); m_statsTable = ""; m_tooltips.clear(); for (int i = 0; i < RESULTLINESCOUNT; i++) m_resultLine[i]->clear(); switch (test.subtype) { case HypothesisTest::Test::SubType::TwoSampleIndependent: { m_currTestName = "

" + i18n("Two Sample Independent Test") + "

"; performTwoSampleIndependentTest(test.type, categoricalVariable, equalVariance); break; } case HypothesisTest::Test::SubType::TwoSamplePaired: m_currTestName = "

" + i18n("Two Sample Paired Test") + "

"; performTwoSamplePairedTest(test.type); break; case HypothesisTest::Test::SubType::OneSample: { m_currTestName = "

" + i18n("One Sample Test") + "

"; performOneSampleTest(test.type); break; } case HypothesisTest::Test::SubType::OneWay: { m_currTestName = "

" + i18n("One Way Anova") + "

"; performOneWayAnova(); break; } case HypothesisTest::Test::SubType::TwoWay: { m_currTestName = "

" + i18n("Two Way Anova") + "

"; performTwoWayAnova(); break; } case HypothesisTest::Test::SubType::NoneSubType: break; } emit changed(); } void HypothesisTest::performLeveneTest(bool categoricalVariable) { m_pValue.clear(); m_statisticValue.clear(); m_statsTable = ""; m_tooltips.clear(); for (int i = 0; i < RESULTLINESCOUNT; i++) m_resultLine[i]->clear(); m_currTestName = "

" + i18n("Levene Test for Equality of Variance") + "

"; m_performLeveneTest(categoricalVariable); emit changed(); } QList& HypothesisTest::statisticValue(){ return m_statisticValue; } QList& HypothesisTest::pValue(){ return m_pValue; } /****************************************************************************** * Private Implementations * ****************************************************************************/ //TODO: backend of z test; //TODO: add tooltip to tables. (currently it is not possible to use with QTextDocument); //TODO: use https://www.gnu.org/software/gsl/doc/html/statistics.html for basic statistic calculations /**************************Two Sample Independent *************************************/ void HypothesisTest::performTwoSampleIndependentTest(HypothesisTest::Test::Type test, bool categoricalVariable, bool equalVariance) { if (m_columns.size() != 2) { printError("Inappropriate number of m_columns selected"); return; } int n[2]; double sum[2], mean[2], std[2]; QString col1Name = m_columns[0]->name(); QString col2Name = m_columns[1]->name(); if (!categoricalVariable && isNumericOrInteger(m_columns[0])) { for (int i = 0; i < 2; i++) { findStats(m_columns[i], n[i], sum[i], mean[i], std[i]); if (n[i] == 0) { - printError("Atleast two values should be there in every column"); + printError("At least two values should be there in every column"); return; } - if (std[i] <= 0) { - printError(i18n("Standard Deviation of atleast one column is equal to 0: last column is: %1", m_columns[i]->name())); + if (gsl_fcmp(std[i], 0., 1.e-16)) { + printError(i18n("Standard Deviation of at least one column is equal to 0: last column is: %1", m_columns[i]->name())); return; } } } else { QMap colName; QString baseColName; int np; int totalRows; countPartitions(m_columns[0], np, totalRows); if (np != 2) { printError( i18n("Number of Categorical Variable in Column %1 is not equal to 2", m_columns[0]->name())); return; } if (isNumericOrInteger(m_columns[0])) baseColName = m_columns[0]->name(); ErrorType errorCode = findStatsCategorical(m_columns[0], m_columns[1], n, sum, mean, std, colName, np, totalRows); switch (errorCode) { case ErrorUnqualSize: { printError( i18n("Unequal size between Column %1 and Column %2", m_columns[0]->name(), m_columns[1]->name())); return; } case ErrorEmptyColumn: { printError("At least one of selected column is empty"); return; } case NoError: break; } QMapIterator i(colName); while (i.hasNext()) { i.next(); if (i.value() == 1) col1Name = baseColName + " " + i.key(); else col2Name = baseColName + " " + i.key(); } } QVariant rowMajor[] = {"", "N", "Sum", "Mean", "Std", col1Name, n[0], sum[0], mean[0], std[0], col2Name, n[1], sum[1], mean[1], std[1] }; m_statsTable = getHtmlTable(3, 5, rowMajor); for (int i = 0; i < 2; i++) { if (n[i] == 0) { - printError("Atleast two values should be there in every column"); + printError("At least two values should be there in every column"); return; } - if (std[i] <= 0) { - printError( i18n("Standard Deviation of atleast one column is equal to 0: last column is: %1", m_columns[i]->name())); + if (gsl_fcmp(std[i], 0., 1.e-16)) { + printError( i18n("Standard Deviation of at least one column is equal to 0: last column is: %1", m_columns[i]->name())); return; } } QString testName; int df = 0; double sp = 0; switch (test) { case HypothesisTest::Test::Type::TTest: { testName = "T"; if (equalVariance) { df = n[0] + n[1] - 2; sp = qSqrt(((n[0]-1) * gsl_pow_2(std[0]) + (n[1]-1) * gsl_pow_2(std[1]) ) / df ); m_statisticValue.append((mean[0] - mean[1]) / (sp * qSqrt(1.0/n[0] + 1.0/n[1]))); printLine(9, "Assumption: Equal Variance b/w both population means"); } else { double temp_val; temp_val = gsl_pow_2( gsl_pow_2(std[0]) / n[0] + gsl_pow_2(std[1]) / n[1]); temp_val = temp_val / ( (gsl_pow_2( (gsl_pow_2(std[0]) / n[0]) ) / (n[0]-1)) + (gsl_pow_2( (gsl_pow_2(std[1]) / n[1]) ) / (n[1]-1))); df = qRound(temp_val); m_statisticValue.append((mean[0] - mean[1]) / (qSqrt( (gsl_pow_2(std[0])/n[0]) + (gsl_pow_2(std[1])/n[1])))); printLine(9, "Assumption: UnEqual Variance b/w both population means"); } printLine(8, "Assumption: Both Populations approximately follow normal distribution"); break; } case HypothesisTest::Test::Type::ZTest: { testName = "Z"; sp = qSqrt( ((n[0]-1) * gsl_pow_2(std[0]) + (n[1]-1) * gsl_pow_2(std[1])) / df); m_statisticValue.append((mean[0] - mean[1]) / (sp * qSqrt( 1.0 / n[0] + 1.0 / n[1]))); // m_pValue.append(gsl_cdf_gaussian_P(m_statisticValue, sp)); break; } case HypothesisTest::Test::Type::Anova: case HypothesisTest::Test::Type::NoneType: break; } m_currTestName = "

" + i18n("Two Sample Independent %1 Test for %2 vs %3", testName, col1Name, col2Name) + "

"; m_pValue.append(getPValue(test, m_statisticValue[0], col1Name, col2Name, (mean[0] - mean[1]), sp, df)); printLine(2, i18n("Significance level is %1", round(m_significanceLevel)), "blue"); printLine(4, i18n("%1 Value is %2 ", testName, round(m_statisticValue[0])), "green"); printTooltip(4, i18n("More is the |%1-value|, more safely we can reject the null hypothesis", testName)); printLine(5, i18n("P Value is %1 ", m_pValue[0]), "green"); printLine(6, i18n("Degree of Freedom is %1", df), "green"); printTooltip(6, i18n("Number of independent Pieces of information that went into calculating the estimate")); if (m_pValue[0] <= m_significanceLevel) printTooltip(5, i18n("We can safely reject Null Hypothesis for significance level %1", round(m_significanceLevel))); else printTooltip(5, i18n("There is a plausibility for Null Hypothesis to be true")); return; } /********************************Two Sample Paired ***************************************/ void HypothesisTest::performTwoSamplePairedTest(HypothesisTest::Test::Type test) { if (m_columns.size() != 2) { printError("Inappropriate number of m_columns selected"); return; } for (int i = 0; i < 2; i++) { if ( !isNumericOrInteger(m_columns[0])) { printError("select only m_columns with numbers"); return; } } int n; double sum, mean, std; ErrorType errorCode = findStatsPaired(m_columns[0], m_columns[1], n, sum, mean, std); switch (errorCode) { case ErrorUnqualSize: { printError("both m_columns are having different sizes"); return; } case ErrorEmptyColumn: { printError("m_columns are empty"); return; } case NoError: break; } QVariant rowMajor[] = {"", "N", "Sum", "Mean", "Std", "difference", n, sum, mean, std }; m_statsTable = getHtmlTable(2, 5, rowMajor); - if (std <= 0) { + if (gsl_fcmp(std, 0., 1.e-16)) { printError("Standard deviation of the difference is 0"); return; } QString testName; int df = 0; switch (test) { case HypothesisTest::Test::Type::TTest: { m_statisticValue[0] = mean / (std / qSqrt(n)); df = n - 1; testName = "T"; printLine(6, i18n("Degree of Freedom is %1name(), i18n("%1", m_populationMean), mean, std, df)); m_currTestName = "

" + i18n("One Sample %1 Test for %2 vs %3", testName, m_columns[0]->name(), m_columns[1]->name()) + "

"; printLine(2, i18n("Significance level is %1 ", round(m_significanceLevel)), "blue"); printLine(4, i18n("%1 Value is %2 ", testName, round(m_statisticValue[0])), "green"); printLine(5, i18n("P Value is %1 ", m_pValue[0]), "green"); if (m_pValue[0] <= m_significanceLevel) printTooltip(5, i18n("We can safely reject Null Hypothesis for significance level %1", m_significanceLevel)); else printTooltip(5, i18n("There is a plausibility for Null Hypothesis to be true")); return; } /******************************** One Sample ***************************************/ void HypothesisTest::performOneSampleTest(HypothesisTest::Test::Type test) { if (m_columns.size() != 1) { printError("Inappropriate number of m_columns selected"); return; } if ( !isNumericOrInteger(m_columns[0])) { printError("select only m_columns with numbers"); return; } int n; double sum, mean, std; ErrorType errorCode = findStats(m_columns[0], n, sum, mean, std); switch (errorCode) { case ErrorEmptyColumn: { printError("column is empty"); return; } case NoError: break; case ErrorUnqualSize: { return; } } QVariant rowMajor[] = {"", "N", "Sum", "Mean", "Std", m_columns[0]->name(), n, sum, mean, std }; m_statsTable = getHtmlTable(2, 5, rowMajor); - if (std <= 0) { + if (gsl_fcmp(std, 0., 1.e-16)) { printError("Standard deviation is 0"); return; } QString testName; int df = 0; switch (test) { case HypothesisTest::Test::Type::TTest: { testName = "T"; m_statisticValue.append((mean - m_populationMean) / (std / qSqrt(n))); df = n - 1; printLine(6, i18n("Degree of Freedom is %1", df), "blue"); break; } case HypothesisTest::Test::Type::ZTest: { testName = "Z"; df = 0; m_statisticValue.append((mean - m_populationMean) / (std / qSqrt(n))); break; } case HypothesisTest::Test::Type::Anova: case HypothesisTest::Test::Type::NoneType: break; } m_pValue.append(getPValue(test, m_statisticValue[0], m_columns[0]->name(), i18n("%1",m_populationMean), mean - m_populationMean, std, df)); m_currTestName = "

" + i18n("One Sample %1 Test for %2", testName, m_columns[0]->name()) + "

"; printLine(2, i18n("Significance level is %1", round(m_significanceLevel)), "blue"); printLine(4, i18n("%1 Value is %2", testName, round(m_statisticValue[0])), "green"); printLine(5, i18n("P Value is %1", m_pValue[0]), "green"); if (m_pValue[0] <= m_significanceLevel) printTooltip(5, i18n("We can safely reject Null Hypothesis for significance level %1", m_significanceLevel)); else printTooltip(5, i18n("There is a plausibility for Null Hypothesis to be true")); return; } /*************************************One Way Anova***************************************/ // all standard variables and formulas are taken from this wikipedia page: // https://en.wikipedia.org/wiki/One-way_analysis_of_variance // b stands for b/w groups // w stands for within groups // np is number of partition i.e., number of classes void HypothesisTest::performOneWayAnova() { int np, totalRows; countPartitions(m_columns[0], np, totalRows); int* ni = new int[np]; double* sum = new double[np]; double* mean = new double[np]; double* std = new double[np]; QString* colNames = new QString[np]; QMap classnameToIndex; QString baseColName; if (isNumericOrInteger(m_columns[0])) baseColName = m_columns[0]->name(); findStatsCategorical(m_columns[0], m_columns[1], ni, sum, mean, std, classnameToIndex, np, totalRows); double yBar = 0; // overall mean double sB = 0; // sum of squares of (mean - overall_mean) between the groups int fB = 0; // degree of freedom between the groups double msB = 0; // mean sum of squares between the groups double sW = 0; // sum of squares of (value - mean of group) within the groups int fW = 0; // degree of freedom within the group double msW = 0; // mean sum of squares within the groups // now finding mean of each group; for (int i = 0; i < np; i++) yBar += mean[i]; yBar = yBar / np; for (int i = 0; i < np; i++) { sB += ni[i] * gsl_pow_2( ( mean[i] - yBar)); if (ni[i] > 1) sW += gsl_pow_2( std[i])*(ni[i] - 1); else sW += gsl_pow_2( std[i]); fW += ni[i] - 1; } fB = np - 1; msB = sB / fB; msW = sW / fW; m_statisticValue.append(msB / msW); m_pValue.append(nsl_stats_fdist_p(m_statisticValue[0], static_cast(np-1), fW)); QMapIterator i(classnameToIndex); while (i.hasNext()) { i.next(); colNames[i.value()-1] = baseColName + " " + i.key(); } // now printing the statistics and result; int rowCount = np + 1, columnCount = 5; QVariant* rowMajor = new QVariant[rowCount*columnCount]; // header data; rowMajor[0] = ""; rowMajor[1] = "Ni"; rowMajor[2] = "Sum"; rowMajor[3] = "Mean"; rowMajor[4] = "Std"; // table data for (int row_i = 1; row_i < rowCount ; row_i++) { rowMajor[row_i*columnCount] = colNames[row_i - 1]; rowMajor[row_i*columnCount + 1] = ni[row_i - 1]; rowMajor[row_i*columnCount + 2] = sum[row_i - 1]; rowMajor[row_i*columnCount + 3] = mean[row_i - 1]; rowMajor[row_i*columnCount + 4] = std[row_i - 1]; } m_statsTable = "

" + i18n("Group Summary Statistics") + "

"; m_statsTable += getHtmlTable(rowCount, columnCount, rowMajor); m_statsTable += getLine(""); m_statsTable += getLine(""); m_statsTable += "

" + i18n("Grand Summary Statistics") + "

"; m_statsTable += getLine(""); m_statsTable += getLine(i18n("Overall Mean is %1", round(yBar))); rowCount = 4; columnCount = 3; rowMajor->clear(); rowMajor[0] = ""; rowMajor[1] = "Between Groups"; rowMajor[2] = "Within Groups"; int baseIndex = 0; baseIndex = 1 * columnCount; rowMajor[baseIndex + 0] = "Sum of Squares"; rowMajor[baseIndex + 1] = sB; rowMajor[baseIndex + 2] = sW; baseIndex = 2 * columnCount; rowMajor[baseIndex + 0] = "Degree of Freedom"; rowMajor[baseIndex + 1] = fB; rowMajor[baseIndex + 2] = fW; baseIndex = 3 * columnCount; rowMajor[baseIndex + 0] = "Mean Square Value"; rowMajor[baseIndex + 1] = msB; rowMajor[baseIndex + 2] = msW; m_statsTable += getHtmlTable(rowCount, columnCount, rowMajor); delete[] ni; delete[] sum; delete[] mean; delete[] std; delete[] colNames; printLine(1, i18n("F Value is %1", round(m_statisticValue[0])), "green"); printLine(2, i18n("P Value is %1 ", m_pValue[0]), "green"); if (m_pValue[0] <= m_significanceLevel) printTooltip(2, i18n("We can safely reject Null Hypothesis for significance level %1", m_significanceLevel)); else printTooltip(2, i18n("There is a plausibility for Null Hypothesis to be true")); return; } /*************************************Two Way Anova***************************************/ // all formulas and symbols are taken from: http://statweb.stanford.edu/~susan/courses/s141/exanova.pdf //TODO: suppress warning of variable length array are a C99 feature. //TODO: add assumptions verification option //TODO: add tail option (if needed) void HypothesisTest::performTwoWayAnova() { int np_a, totalRows_a; int np_b, totalRows_b; countPartitions(m_columns[0], np_a, totalRows_a); countPartitions(m_columns[1], np_b, totalRows_b); double groupMean[np_a][np_b]; int replicates[np_a][np_b]; for (int i = 0; i < np_a; i++) for (int j = 0; j < np_b; j++) { groupMean[i][j] = 0; replicates[i][j] = 0; } if (totalRows_a != totalRows_b) { - printError("There is missing data in atleast one of the rows"); + printError("There is missing data in at least one of the rows"); return; } QMap catToNumber_a; QMap catToNumber_b; int partitionNumber_a = 1; int partitionNumber_b = 1; for (int i = 0; i < totalRows_a; i++) { QString name_a = m_columns[0]->textAt(i); QString name_b = m_columns[1]->textAt(i); double value = m_columns[2]->valueAt(i); if (catToNumber_a[name_a] == 0) { catToNumber_a[name_a] = partitionNumber_a; partitionNumber_a++; } if (catToNumber_b[name_b] == 0) { catToNumber_b[name_b] = partitionNumber_b; partitionNumber_b++; } groupMean[catToNumber_a[name_a] - 1][catToNumber_b[name_b] - 1] += value; replicates[catToNumber_a[name_a] - 1][catToNumber_b[name_b] - 1] += 1; } int replicate = replicates[0][0]; for (int i = 0; i < np_a; i++) for (int j = 0; j < np_b; j++) { if (replicates[i][j] == 0) { - printError("Dataset should have atleast one data value corresponding to each feature combination"); + printError("Dataset should have at least one data value corresponding to each feature combination"); return; } if (replicates[i][j] != replicate) { printError("Number of experiments perfomed for each combination of levels
" "between Independet Var.1 and Independent Var.2 must be equal"); return; } groupMean[i][j] /= replicates[i][j]; } double ss_within = 0; for (int i = 0; i < totalRows_a; i++) { QString name_a = m_columns[0]->textAt(i); QString name_b = m_columns[1]->textAt(i); double value = m_columns[2]->valueAt(i); ss_within += gsl_pow_2(value - groupMean[catToNumber_a[name_a] - 1][catToNumber_b[name_b] - 1]); } int df_within = (replicate - 1) * np_a * np_b; double ms_within = ss_within / df_within; double* mean_a = new double[np_a]; double* mean_b = new double[np_b]; for (int i = 0; i < np_a; i++) { for (int j = 0; j < np_b; j++) { mean_a[i] += groupMean[i][j] / np_b; mean_b[j] += groupMean[i][j] / np_a; } } double mean = 0; for (int i = 0; i < np_a; i++) mean += mean_a[i] / np_a; double ss_a = 0; for (int i = 0; i < np_a; i++) ss_a += gsl_pow_2(mean_a[i] - mean); ss_a *= replicate * np_b; int df_a = np_a - 1; double ms_a = ss_a / df_a; double ss_b = 0; for (int i = 0; i < np_b; i++) ss_b += gsl_pow_2(mean_b[i] - mean); ss_b *= replicate * np_a; int df_b = np_b - 1; double ms_b = ss_b / df_b; double ss_interaction = 0; for (int i = 0; i < np_a; i++) for (int j = 0; j < np_b; j++) ss_interaction += gsl_pow_2(groupMean[i][j] - mean_a[i] - mean_b[j] + mean); ss_interaction *= replicate; int df_interaction = (np_a - 1) * (np_b - 1); double ms_interaction = ss_interaction / df_interaction; QString* partitionNames_a = new QString[np_a]; QString* partitionNames_b = new QString[np_b]; QMapIterator itr_a(catToNumber_a); while (itr_a.hasNext()) { itr_a.next(); partitionNames_a[itr_a.value()-1] = itr_a.key(); } QMapIterator itr_b(catToNumber_b); while (itr_b.hasNext()) { itr_b.next(); partitionNames_b[itr_b.value()-1] = itr_b.key(); } // printing table; // cell constructor structure; data, level, rowSpanCount, m_columnspanCount, isHeader; QList rowMajor; rowMajor.append(new Cell("", 0, true, "", 2, 1)); for (int i = 0; i < np_b; i++) rowMajor.append(new Cell(partitionNames_b[i], 0, true, "", 1, 2)); rowMajor.append(new Cell("Mean", 0, true, "", 2)); for (int i = 0; i < np_b; i++) { rowMajor.append(new Cell("Mean", 1, true)); rowMajor.append(new Cell("Replicate", 1, true)); } int level = 2; for (int i = 0; i < np_a; i++) { rowMajor.append(new Cell(partitionNames_a[i], level, true)); for (int j = 0; j < np_b; j++) { rowMajor.append(new Cell(round(groupMean[i][j]), level)); rowMajor.append(new Cell(replicates[i][j], level)); } rowMajor.append(new Cell(round(mean_a[i]), level)); level++; } rowMajor.append(new Cell("Mean", level, true)); for (int i = 0; i < np_b; i++) rowMajor.append(new Cell(round(mean_b[i]), level, false, "", 1, 2)); rowMajor.append(new Cell(round(mean), level)); m_statsTable = "

" + i18n("Contingency Table") + "

"; m_statsTable += getHtmlTable3(rowMajor); m_statsTable += "
"; m_statsTable += "

" + i18n("results table") + "

"; rowMajor.clear(); level = 0; rowMajor.append(new Cell("", level, true)); rowMajor.append(new Cell("SS", level, true)); rowMajor.append(new Cell("DF", level, true, "degree of freedom")); rowMajor.append(new Cell("MS", level, true)); level++; rowMajor.append(new Cell(m_columns[0]->name(), level, true)); rowMajor.append(new Cell(round(ss_a), level)); rowMajor.append(new Cell(df_a, level)); rowMajor.append(new Cell(round(ms_a), level)); level++; rowMajor.append(new Cell(m_columns[1]->name(), level, true)); rowMajor.append(new Cell(round(ss_b), level)); rowMajor.append(new Cell(df_b, level)); rowMajor.append(new Cell(round(ms_b), level)); level++; rowMajor.append(new Cell("Interaction", level, true)); rowMajor.append(new Cell(round(ss_interaction), level)); rowMajor.append(new Cell(df_interaction, level)); rowMajor.append(new Cell(round(ms_interaction), level)); level++; rowMajor.append(new Cell("Within", level, true)); rowMajor.append(new Cell(round(ss_within), level)); rowMajor.append(new Cell(df_within, level)); rowMajor.append(new Cell(round(ms_within), level)); m_statsTable += getHtmlTable3(rowMajor); double fValue_a = ms_a / ms_within; double fValue_b = ms_b / ms_within; double fValue_interaction = ms_interaction / ms_within; double m_pValue_a = nsl_stats_fdist_p(fValue_a, static_cast(np_a - 1), df_a); double m_pValue_b = nsl_stats_fdist_p(fValue_b, static_cast(np_b - 1), df_b); printLine(0, "F(df" + m_columns[0]->name() + ", dfwithin) is " + round(fValue_a), "blue"); printLine(1, "F(df" + m_columns[1]->name() + ", dfwithin) is " + round(fValue_b), "blue"); printLine(2, "F(dfinteraction, dfwithin) is " + round(fValue_interaction), "blue"); printLine(4, "P(df" + m_columns[0]->name() + ", dfwithin) is " + round(m_pValue_a), "blue"); printLine(5, "P(df" + m_columns[1]->name() + ", dfwithin) is " + round(m_pValue_b), "blue"); // printLine(2, "P(dfinteraction, dfwithin) is " + round(fValue_interaction), "blue"); m_statisticValue.append(fValue_a); m_statisticValue.append(fValue_b); m_statisticValue.append(fValue_interaction); m_pValue.append(m_pValue_a); m_pValue.append(m_pValue_b); delete[] mean_a; delete[] mean_b; delete[] partitionNames_a; delete[] partitionNames_b; return; } /**************************************Levene Test****************************************/ // Some reference to local variables. // np = number of partitions // df = degree of fredom // totalRows = total number of rows in column // these variables are taken from: https://en.wikipedia.org/wiki/Levene%27s_test // yiBar = mean of ith group; // Zij = |Yij - yiBar| // ziBar = mean of Zij for group i // ziBarBar = mean for all zij // ni = number of elements in group i void HypothesisTest::m_performLeveneTest(bool categoricalVariable) { if (m_columns.size() != 2) { printError("Inappropriate number of m_columns selected"); return; } int np = 0; int n = 0; if (!categoricalVariable && isNumericOrInteger(m_columns[0])) np = m_columns.size(); else countPartitions(m_columns[0], np, n); if (np < 2) { - printError("Select atleast two m_columns / classes"); + printError("Select at least two m_columns / classes"); return; } double* yiBar = new double[np]; double* ziBar = new double[np]; double ziBarBar = 0; double* ni = new double[np]; for (int i = 0; i < np; i++) { yiBar[i] = 0; ziBar[i] = 0; ni[i] = 0; } double fValue; int df = 0; int totalRows = 0; QString* colNames = new QString[np]; if (!categoricalVariable && isNumericOrInteger(m_columns[0])) { totalRows = m_columns[0]->rowCount(); double value = 0; for (int j = 0; j < totalRows; j++) { int numberNaNCols = 0; for (int i = 0; i < np; i++) { value = m_columns[i]->valueAt(j); if (std::isnan(value)) { numberNaNCols++; continue; } yiBar[i] += value; ni[i]++; n++; } if (numberNaNCols == np) { totalRows = j; break; } } for (int i = 0; i < np; i++) { if (ni[i] > 0) yiBar[i] = yiBar[i] / ni[i]; else { printError("One of the selected m_columns is empty
" "or have choosen Independent Var.1 wrongly"); return; } } for (int j = 0; j < totalRows; j++) { for (int i = 0; i < np; i++) { value = m_columns[i]->valueAt(j); if (!(std::isnan(value))) ziBar[i] += fabs(value - yiBar[i]); } } for (int i = 0; i < np; i++) { ziBarBar += ziBar[i]; if (ni[i] > 0) ziBar[i] = ziBar[i] / ni[i]; } ziBarBar = ziBarBar / n; double numberatorValue = 0; double denominatorValue = 0; for (int j = 0; j < totalRows; j++) { for (int i = 0; i < np; i++) { value = m_columns[i]->valueAt(j); if (!(std::isnan(value))) { double zij = fabs(value - yiBar[i]); denominatorValue += gsl_pow_2( (zij - ziBar[i])); } } } - if (denominatorValue <= 0) { + if (gsl_fcmp(denominatorValue, 0. ,1.e-16)) { printError( i18n("Denominator value is %1", denominatorValue)); return; } for (int i = 0; i < np; i++) { colNames[i] = m_columns[i]->name(); numberatorValue += ni[i]*gsl_pow_2( (ziBar[i]-ziBarBar)); } fValue = ((n - np) / (np - 1)) * (numberatorValue / denominatorValue); } else { QMap classnameToIndex; AbstractColumn::ColumnMode originalColMode = m_columns[0]->columnMode(); m_columns[0]->setColumnMode(AbstractColumn::Text); int partitionNumber = 1; QString name; double value; int classIndex; for (int j = 0; j < n; j++) { name = m_columns[0]->textAt(j); value = m_columns[1]->valueAt(j); if (std::isnan(value)) { n = j; break; } if (classnameToIndex[name] == 0) { classnameToIndex[name] = partitionNumber; partitionNumber++; } classIndex = classnameToIndex[name]-1; ni[classIndex]++; yiBar[classIndex] += value; } for (int i = 0; i < np; i++) { if (ni[i] > 0) yiBar[i] = yiBar[i] / ni[i]; else { printError("One of the selected m_columns is empty
" "or have choosen Independent Var.1 wrongly"); m_columns[0]->setColumnMode(originalColMode); return; } } for (int j = 0; j < n; j++) { name = m_columns[0]->textAt(j); value = m_columns[1]->valueAt(j); classIndex = classnameToIndex[name] - 1; ziBar[classIndex] += fabs(value - yiBar[classIndex]); } for (int i = 0; i < np; i++) { ziBarBar += ziBar[i]; ziBar[i] = ziBar[i] / ni[i]; } ziBarBar = ziBarBar / n; double numberatorValue = 0; double denominatorValue = 0; for (int j = 0; j < n; j++) { name = m_columns[0]->textAt(j); value = m_columns[1]->valueAt(j); classIndex = classnameToIndex[name] - 1; double zij = fabs(value - yiBar[classIndex]); denominatorValue += gsl_pow_2( (zij - ziBar[classIndex])); } for (int i = 0; i < np; i++) numberatorValue += ni[i]*gsl_pow_2( (ziBar[i]-ziBarBar)); - if (denominatorValue <= 0) { + if (gsl_fcmp(denominatorValue, 0., 1.e-16)) { printError( "number of data points is less or than equal to number of categorical variables"); m_columns[0]->setColumnMode(originalColMode); return; } fValue = ((n - np) / (np - 1)) * (numberatorValue / denominatorValue); QMapIterator i(classnameToIndex); while (i.hasNext()) { i.next(); colNames[i.value()-1] = m_columns[0]->name() + " " + i.key(); } m_columns[0]->setColumnMode(originalColMode); } df = n - np; // now making the stats table. int rowCount = np+1; int columnCount = 4; QVariant* rowMajor = new QVariant[rowCount*columnCount]; // header data; rowMajor[0] = ""; rowMajor[1] = "Ni"; rowMajor[2] = "yiBar"; rowMajor[3] = "ziBar"; // table data for (int row_i = 1; row_i < rowCount; row_i++) { rowMajor[row_i*columnCount] = colNames[row_i-1]; rowMajor[row_i*columnCount + 1] = ni[row_i-1]; rowMajor[row_i*columnCount + 2] = yiBar[row_i-1]; rowMajor[row_i*columnCount + 3] = ziBar[row_i-1]; } m_statsTable = getHtmlTable(rowCount, columnCount, rowMajor); delete[] rowMajor; delete[] yiBar; delete[] ziBar; delete[] ni; m_pValue.append(nsl_stats_fdist_p(fValue, static_cast(np-1), df)); printLine(0, "Null Hypothesis: Variance is equal between all classes", "blue"); printLine(1, "Alternate Hypothesis: Variance is not equal in at-least one pair of classes", "blue"); printLine(2, i18n("Significance level is %1", round(m_significanceLevel)), "blue"); printLine(4, i18n("F Value is %1 ", round(fValue)), "green"); printLine(5, i18n("P Value is %1 ", m_pValue[0]), "green"); printLine(6, i18n("Degree of Freedom is %1", df), "green"); if (m_pValue[0] <= m_significanceLevel) { printTooltip(5, i18n("We can safely reject Null Hypothesis for significance level %1", m_significanceLevel)); printLine(8, "Requirement for homogeneity is not met", "red"); } else { printTooltip(5, i18n("There is a plausibility for Null Hypothesis to be true")); printLine(8, "Requirement for homogeneity is met", "green"); } m_statisticValue.append(fValue); return; } //TODO change ("⋖") symbol to ("<"), currently macro UTF8_QSTRING is not working properly if used "<" symbol; // TODO: check for correctness between: for TestZ with TailTwo // m_pValue.append(2*gsl_cdf_tdist_P(value, df) v/s // m_pValue.append(gsl_cdf_tdis_P(value, df) + gsl_cdf_tdis_P(-value, df); double HypothesisTest::getPValue(const HypothesisTest::Test::Type& test, double& value, const QString& col1Name, const QString& col2Name, const double mean, const double sp, const int df) { switch (test) { case HypothesisTest::Test::Type::TTest: { switch (m_tailType) { case HypothesisTest::Test::Tail::Negative: { m_pValue.append(gsl_cdf_tdist_P(value, df)); printLine(0, i18n("Null Hypothesis: Population mean of %1 %2 Population mean of %3", col1Name, UTF8_QSTRING("≥"), col2Name), "blue"); printLine(1, i18n("Alternate Hypothesis: Population mean of %1 %2 Population mean of %3", col1Name, UTF8_QSTRING("⋖"), col2Name), "blue"); break; } case HypothesisTest::Test::Tail::Positive: { value *= -1; m_pValue.append(gsl_cdf_tdist_P(value, df)); printLine(0, i18n("Null Hypothesis: Population mean of %1 %2 Population mean of %3", col1Name, UTF8_QSTRING("≤"), col2Name), "blue"); printLine(1, i18n("Alternate Hypothesis: Population mean of %1 %2 Population mean of %3", col1Name, UTF8_QSTRING(">"), col2Name), "blue"); break; } case HypothesisTest::Test::Tail::Two: { m_pValue.append(2.*gsl_cdf_tdist_P(-fabs(value), df)); printLine(0, i18n("Null Hypothesis: Population mean of %1 %2 Population mean of %3", col1Name, UTF8_QSTRING("="), col2Name), "blue"); printLine(1, i18n("Alternate Hypothesis: Population mean of %1 %2 Population mean of %3", col1Name, UTF8_QSTRING("≠"), col2Name), "blue"); break; } } break; } case HypothesisTest::Test::Type::ZTest: { switch (m_tailType) { case HypothesisTest::Test::Tail::Negative: { m_pValue.append(gsl_cdf_gaussian_P(value - mean, sp)); printLine(0, i18n("Null Hypothesis: Population mean of %1 %2 Population mean of %3 ", col1Name, UTF8_QSTRING("≥"), col2Name), "blue"); printLine(1, i18n("Alternate Hypothesis: Population mean of %1 %2 Population mean of %3 ", col1Name, UTF8_QSTRING("⋖"), col2Name), "blue"); break; } case HypothesisTest::Test::Tail::Positive: { value *= -1; m_pValue.append(nsl_stats_tdist_p(value - mean, sp)); printLine(0, i18n("Null Hypothesis: Population mean of %1 %2 Population mean of %3 ", col1Name, UTF8_QSTRING("≤"), col2Name), "blue"); printLine(1, i18n("Alternate Hypothesis: Population mean of %1 %2 Population mean of %3 ", col1Name, UTF8_QSTRING(">"), col2Name), "blue"); break; } case HypothesisTest::Test::Tail::Two: { m_pValue.append(2.*gsl_cdf_gaussian_P(value - mean, sp)); printLine(0, i18n("Null Hypothesis: Population mean of %1 %2 Population mean of %3 ", col1Name, UTF8_QSTRING("="), col2Name), "blue"); printLine(1, i18n("Alternate Hypothesis: Population mean of %1 %2 Population mean of %3 ", col1Name, UTF8_QSTRING("≠"), col2Name), "blue"); break; } } break; } case HypothesisTest::Test::Type::Anova: case HypothesisTest::Test::Type::NoneType: break; } if (m_pValue[0] > 1) return 1; return m_pValue[0]; } // Virtual functions QWidget* HypothesisTest::view() const { if (!m_partView) { m_view = new HypothesisTestView(const_cast(this)); m_partView = m_view; } return m_partView; } diff --git a/src/backend/generalTest/HypothesisTest.h b/src/backend/generalTest/HypothesisTest.h index ef00b8906..2723f899e 100644 --- a/src/backend/generalTest/HypothesisTest.h +++ b/src/backend/generalTest/HypothesisTest.h @@ -1,100 +1,91 @@ /*************************************************************************** File : HypothesisTest.h Project : LabPlot Description : Doing Hypothesis-Test on data provided -------------------------------------------------------------------- Copyright : (C) 2019 Devanshu Agarwal(agarwaldevanshu8@gmail.com) ***************************************************************************/ /*************************************************************************** * * * This program is free software; you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * * the Free Software Foundation; either version 2 of the License, or * * (at your option) any later version. * * * * This program is distributed in the hope that it will be useful, * * but WITHOUT ANY WARRANTY; without even the implied warranty of * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * * GNU General Public License for more details. * * * * You should have received a copy of the GNU General Public License * * along with this program; if not, write to the Free Software * * Foundation, Inc., 51 Franklin Street, Fifth Floor, * * Boston, MA 02110-1301 USA * * * ***************************************************************************/ #ifndef HYPOTHESISTEST_H #define HYPOTHESISTEST_H -#include "backend/core/AbstractPart.h" #include "GeneralTest.h" -#include "backend/lib/macros.h" - -class HypothesisTestView; -class Spreadsheet; -class QString; -class Column; -class QVBoxLayout; -class QLabel; class HypothesisTest : public GeneralTest { Q_OBJECT public: explicit HypothesisTest(const QString& name); ~HypothesisTest() override; struct Test { enum Type { NoneType = 0, TTest = 1 << 0, ZTest = 1 << 1, Anova = 1 << 2 }; enum SubType { NoneSubType = 0, TwoSampleIndependent = 1 << 0, TwoSamplePaired = 1 << 1, OneSample = 1 << 2, OneWay = 1 << 3, TwoWay = 1 << 4 }; enum Tail {Positive, Negative, Two}; Type type = NoneType; SubType subtype = NoneSubType; Tail tail; }; void setPopulationMean(QVariant populationMean); void setSignificanceLevel(QVariant alpha); void performTest(Test m_test, bool categoricalVariable = true, bool equalVariance = true); void performLeveneTest(bool categoricalVariable); QList& statisticValue(); QList& pValue(); QWidget* view() const override; private: void performTwoSampleIndependentTest(HypothesisTest::Test::Type test, bool categoricalVariable = false, bool equalVariance = true); void performTwoSamplePairedTest(HypothesisTest::Test::Type test); void performOneSampleTest(HypothesisTest::Test::Type test); void performOneWayAnova(); void performTwoWayAnova(); void m_performLeveneTest(bool categoricalVariable); double getPValue(const HypothesisTest::Test::Type& test, double& value, const QString& col1Name, const QString& col2name, const double mean, const double sp, const int df); double m_populationMean; double m_significanceLevel; HypothesisTest::Test::Tail m_tailType; QList m_pValue; QList m_statisticValue; }; #endif // HypothesisTest_H diff --git a/src/kdefrontend/dockwidgets/CorrelationCoefficientDock.cpp b/src/kdefrontend/dockwidgets/CorrelationCoefficientDock.cpp index 71d5ecad3..7d78d2ed8 100644 --- a/src/kdefrontend/dockwidgets/CorrelationCoefficientDock.cpp +++ b/src/kdefrontend/dockwidgets/CorrelationCoefficientDock.cpp @@ -1,514 +1,513 @@ /*************************************************************************** File : CorrelationCoefficientDock.cpp Project : LabPlot Description : widget for correlation test properties -------------------------------------------------------------------- Copyright : (C) 2019 Devanshu Agarwal(agarwaldevanshu8@gmail.com) ***************************************************************************/ /*************************************************************************** * * * This program is free software; you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * * the Free Software Foundation; either version 2 of the License, or * * (at your option) any later version. * * * * This program is distributed in the hope that it will be useful, * * but WITHOUT ANY WARRANTY; without even the implied warranty of * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * * GNU General Public License for more details. * * * * You should have received a copy of the GNU General Public License * * along with this program; if not, write to the Free Software * * Foundation, Inc., 51 Franklin Street, Fifth Floor, * * Boston, MA 02110-1301 USA * * * ***************************************************************************/ #include "CorrelationCoefficientDock.h" #include "backend/core/AspectTreeModel.h" #include "backend/core/AbstractAspect.h" #include "backend/core/Project.h" #include "backend/spreadsheet/Spreadsheet.h" #include "commonfrontend/widgets/TreeViewComboBox.h" #include "kdefrontend/datasources/DatabaseManagerDialog.h" #include "kdefrontend/datasources/DatabaseManagerWidget.h" #include "kdefrontend/TemplateHandler.h" #include #include #include #include #include #include #include #include /*! \class CorrelationCoefficientDock \brief Provides a dock (widget) for correlation testing: \ingroup kdefrontend */ //TODO: To add tooltips in docks for non obvious widgets. //TODO: Add functionality for database along with spreadsheet. CorrelationCoefficientDock::CorrelationCoefficientDock(QWidget* parent) : QWidget(parent) { ui.setupUi(this); ui.cbDataSourceType->addItem(i18n("Spreadsheet")); ui.cbDataSourceType->addItem(i18n("Database")); cbSpreadsheet = new TreeViewComboBox; ui.gridLayout->addWidget(cbSpreadsheet, 5, 4, 1, 3); ui.bDatabaseManager->setIcon(QIcon::fromTheme("network-server-database")); ui.bDatabaseManager->setToolTip(i18n("Manage connections")); m_configPath = QStandardPaths::standardLocations(QStandardPaths::AppDataLocation).constFirst() + "sql_connections"; ui.cbTest->addItem( i18n("Pearson r"), CorrelationCoefficient::Test::Pearson); ui.cbTest->addItem( i18n("Kendall"), CorrelationCoefficient::Test::Kendall); ui.cbTest->addItem( i18n("Spearman"), CorrelationCoefficient::Test::Spearman); // adding item to tests and testtype combo box; // making all test blocks invisible at starting. ui.lCategorical->hide(); ui.chbCategorical->hide(); ui.lCol1->hide(); ui.cbCol1->hide(); ui.lCol2->hide(); ui.cbCol2->hide(); ui.pbPerformTest->setEnabled(false); ui.pbPerformTest->setIcon(QIcon::fromTheme("run-build")); // readConnections(); connect(ui.cbDataSourceType, static_cast(&QComboBox::currentIndexChanged), this, &CorrelationCoefficientDock::dataSourceTypeChanged); connect(cbSpreadsheet, &TreeViewComboBox::currentModelIndexChanged, this, &CorrelationCoefficientDock::spreadsheetChanged); // connect(ui.cbConnection, static_cast(&QComboBox::currentIndexChanged), // this, &CorrelationCoefficientDock::connectionChanged); // connect(ui.cbTable, static_cast(&QComboBox::currentIndexChanged), // this, &CorrelationCoefficientDock::tableChanged); // connect(ui.bDatabaseManager, &QPushButton::clicked, this, &CorrelationCoefficientDock::showDatabaseManager); // connect(ui.bAddRow, &QPushButton::clicked, this, &CorrelationCoefficientDock::addRow); // connect(ui.bRemoveRow, &QPushButton::clicked, this,&CorrelationCoefficientDock::removeRow); // connect(ui.bAddColumn, &QPushButton::clicked, this, &CorrelationCoefficientDock::addColumn); // connect(ui.bRemoveColumn, &QPushButton::clicked, this,&CorrelationCoefficientDock::removeColumn); // connect(ui.cbCol1, static_cast(&QComboBox::currentIndexChanged), this, &CorrelationCoefficientDock::doTTest); // connect(ui.cbCol2, static_cast(&QComboBox::currentIndexChanged), this, &CorrelationCoefficientDock::doTTest); // connect(ui.lwFields, &QListWidget::itemSelectionChanged, this, [=]() { // bool enabled = !ui.lwFields->selectedItems().isEmpty(); // ui.bAddRow->setEnabled(enabled); // ui.bAddColumn->setEnabled(enabled); // }); // connect(ui.lwRows, &QListWidget::doubleClicked, this,&CorrelationCoefficientDock::removeRow); // connect(ui.lwRows, &QListWidget::itemSelectionChanged, this, [=]() { // ui.bRemoveRow->setEnabled(!ui.lwRows->selectedItems().isEmpty()); // }); // connect(ui.lwColumns, &QListWidget::doubleClicked, this,&CorrelationCoefficientDock::removeColumn); // connect(ui.lwColumns, &QListWidget::itemSelectionChanged, this, [=]() { // ui.bRemoveColumn->setEnabled(!ui.lwColumns->selectedItems().isEmpty()); // }); connect(ui.cbTest, static_cast(&QComboBox::currentIndexChanged), this, &CorrelationCoefficientDock::showCorrelationCoefficient); connect(ui.chbCategorical, &QCheckBox::stateChanged, this, &CorrelationCoefficientDock::changeCbCol2Label); connect(ui.pbPerformTest, &QPushButton::clicked, this, &CorrelationCoefficientDock::findCorrelationCoefficient); connect(ui.cbCol1, static_cast(&QComboBox::currentIndexChanged), this, &CorrelationCoefficientDock::col1IndexChanged); ui.cbTest->setCurrentIndex(0); emit ui.cbTest->currentIndexChanged(0); } void CorrelationCoefficientDock::setCorrelationCoefficient(CorrelationCoefficient* CorrelationCoefficient) { m_initializing = true; m_correlationCoefficient = CorrelationCoefficient; m_aspectTreeModel = new AspectTreeModel(m_correlationCoefficient->project()); QList list{AspectType::Folder, AspectType::Workbook, AspectType::Spreadsheet, AspectType::LiveDataSource}; cbSpreadsheet->setTopLevelClasses(list); list = {AspectType::Spreadsheet, AspectType::LiveDataSource}; m_aspectTreeModel->setSelectableAspects(list); cbSpreadsheet->setModel(m_aspectTreeModel); //show the properties ui.leName->setText(m_correlationCoefficient->name()); ui.leComment->setText(m_correlationCoefficient->comment()); ui.cbDataSourceType->setCurrentIndex(m_correlationCoefficient->dataSourceType()); if (m_correlationCoefficient->dataSourceType() == CorrelationCoefficient::DataSourceType::DataSourceSpreadsheet) setModelIndexFromAspect(cbSpreadsheet, m_correlationCoefficient->dataSourceSpreadsheet()); // else // ui.cbConnection->setCurrentIndex(ui.cbConnection->findText(m_correlationCoefficient->dataSourceConnection())); setColumnsComboBoxModel(m_correlationCoefficient->dataSourceSpreadsheet()); this->dataSourceTypeChanged(ui.cbDataSourceType->currentIndex()); //setting rows and columns in combo box; //undo functions // connect(m_correlationCoefficient, SIGNAL(aspectDescriptionChanged(const AbstractAspect*)), this, SLOT(CorrelationCoefficientDescriptionChanged(const AbstractAspect*))); m_initializing = false; } void CorrelationCoefficientDock::showCorrelationCoefficient() { if (ui.cbTest->count() == 0) return; m_test = CorrelationCoefficient::Test(ui.cbTest->currentData().toInt()); ui.lCol1->show(); ui.cbCol1->show(); ui.lCol2->show(); ui.cbCol2->show(); ui.lCategorical->setVisible(bool(m_test & CorrelationCoefficient::Test::Pearson)); ui.chbCategorical->setVisible(bool(m_test & CorrelationCoefficient::Test::Pearson)); setColumnsComboBoxView(); ui.pbPerformTest->setEnabled(nonEmptySelectedColumns()); } void CorrelationCoefficientDock::findCorrelationCoefficient() { QVector cols; if (ui.cbCol1->count() == 0) return; cols << reinterpret_cast(ui.cbCol1->currentData().toLongLong()); cols << reinterpret_cast(ui.cbCol2->currentData().toLongLong()); m_correlationCoefficient->setColumns(cols); m_correlationCoefficient->performTest(m_test, ui.chbCategorical->isChecked()); } void CorrelationCoefficientDock::setModelIndexFromAspect(TreeViewComboBox* cb, const AbstractAspect* aspect) { if (aspect) cb->setCurrentModelIndex(m_aspectTreeModel->modelIndexOfAspect(aspect)); else cb->setCurrentModelIndex(QModelIndex()); } ////************************************************************* ////****** SLOTs for changes triggered in CorrelationCoefficientDock ******* ////************************************************************* //void CorrelationCoefficientDock::nameChanged() { // if (m_initializing) // return; // m_correlationCoefficient->setName(ui.leName->text()); //} //void CorrelationCoefficientDock::commentChanged() { // if (m_initializing) // return; // m_correlationCoefficient->setComment(ui.leComment->text()); //} void CorrelationCoefficientDock::dataSourceTypeChanged(int index) { //QDEBUG("in dataSourceTypeChanged"); CorrelationCoefficient::DataSourceType type = static_cast(index); bool showDatabase = (type == CorrelationCoefficient::DataSourceType::DataSourceDatabase); ui.lSpreadsheet->setVisible(!showDatabase); cbSpreadsheet->setVisible(!showDatabase); ui.lConnection->setVisible(showDatabase); ui.cbConnection->setVisible(showDatabase); ui.bDatabaseManager->setVisible(showDatabase); ui.lTable->setVisible(showDatabase); ui.cbTable->setVisible(showDatabase); if (m_initializing) return; m_correlationCoefficient->setComment(ui.leComment->text()); } void CorrelationCoefficientDock::spreadsheetChanged(const QModelIndex& index) { //QDEBUG("in spreadsheetChanged"); auto* aspect = static_cast(index.internalPointer()); Spreadsheet* spreadsheet = dynamic_cast(aspect); setColumnsComboBoxModel(spreadsheet); m_correlationCoefficient->setDataSourceSpreadsheet(spreadsheet); } void CorrelationCoefficientDock::col1IndexChanged(int index) { if (index < 0) return; changeCbCol2Label(); } //void CorrelationCoefficientDock::connectionChanged() { // if (ui.cbConnection->currentIndex() == -1) { // ui.lTable->hide(); // ui.cbTable->hide(); // return; // } // //clear the previously shown tables // ui.cbTable->clear(); // ui.lTable->show(); // ui.cbTable->show(); // const QString& connection = ui.cbConnection->currentText(); // //connection name was changed, determine the current connections settings // KConfig config(m_configPath, KConfig::SimpleConfig); // KConfigGroup group = config.group(connection); // //close and remove the previos connection, if available // if (m_db.isOpen()) { // m_db.close(); // QSqlDatabase::removeDatabase(m_db.driverName()); // } // //open the selected connection // //QDEBUG("CorrelationCoefficientDock: connecting to " + connection); // const QString& driver = group.readEntry("Driver"); // m_db = QSqlDatabase::addDatabase(driver); // const QString& dbName = group.readEntry("DatabaseName"); // if (DatabaseManagerWidget::isFileDB(driver)) { // if (!QFile::exists(dbName)) { // KMessageBox::error(this, i18n("Couldn't find the database file '%1'. Please check the connection settings.", dbName), // appendRow i18n("Connection Failed")); // return; // } else // m_db.setDatabaseName(dbName); // } else if (DatabaseManagerWidget::isODBC(driver)) { // if (group.readEntry("CustomConnectionEnabled", false)) // m_db.setDatabaseName(group.readEntry("CustomConnectionString")); // else // m_db.setDatabaseName(dbName); // } else { // m_db.setDatabaseName(dbName); // m_db.setHostName( group.readEntry("HostName") ); // m_db.setPort( group.readEntry("Port", 0) ); // m_db.setUserName( group.readEntry("UserName") ); // m_db.setPassword( group.readEntry("Password") ); // } // WAIT_CURSOR; // if (!m_db.open()) { // RESET_CURSOR; // KMessageBox::error(this, i18n("Failed to connect to the database '%1'. Please check the connection settings.", ui.cbConnection->currentText()) + // QLatin1String("\n\n") + m_db.lastError().databaseText(), // i18n("Connection Failed")); // return; // } // //show all available database tables // if (m_db.tables().size()) { // for (auto table : m_db.tables()) // ui.cbTable->addItem(QIcon::fromTheme("view-form-table"), table); // ui.cbTable->setCurrentIndex(0); // } // RESET_CURSOR; // if (m_initializing) // return; //// m_correlationCoefficient->setDataSourceConnection(connection); //} //void CorrelationCoefficientDock::tableChanged() { // const QString& table = ui.cbTable->currentText(); // //show all attributes of the selected table //// for (const auto* col : spreadsheet->children()) { //// QListWidgetItem* item = new QListWidgetItem(col->icon(), col->name()); //// ui.lwFields->addItem(item); //// } // if (m_initializing) // return; //// m_correlationCoefficient->setDataSourceTable(table); //} ////************************************************************* ////******** SLOTs for changes triggered in Spreadsheet ********* ////************************************************************* void CorrelationCoefficientDock::CorrelationCoefficientDescriptionChanged(const AbstractAspect* aspect) { if (m_correlationCoefficient != aspect) return; m_initializing = true; if (aspect->name() != ui.leName->text()) ui.leName->setText(aspect->name()); else if (aspect->comment() != ui.leComment->text()) ui.leComment->setText(aspect->comment()); m_initializing = false; } void CorrelationCoefficientDock::changeCbCol2Label() { if (ui.cbCol1->count() == 0) return; QString selected_text = ui.cbCol1->currentText(); Column* col1 = m_correlationCoefficient->dataSourceSpreadsheet()->column(selected_text); if (bool(m_test & (CorrelationCoefficient::Test::Kendall | CorrelationCoefficient::Test::Spearman)) || (!ui.chbCategorical->isChecked() && (col1->columnMode() == AbstractColumn::Integer || col1->columnMode() == AbstractColumn::Numeric))) { ui.lCol2->setText( i18n("Independent Var. 2")); ui.chbCategorical->setChecked(false); ui.chbCategorical->setEnabled(true); } else { ui.lCol2->setText( i18n("Dependent Var. 1")); if (!ui.chbCategorical->isChecked()) ui.chbCategorical->setEnabled(false); else ui.chbCategorical->setEnabled(true); ui.chbCategorical->setChecked(true); } } ////************************************************************* ////******************** SETTINGS ******************************* ////************************************************************* //void CorrelationCoefficientDock::load() { //} //void CorrelationCoefficientDock::loadConfigFromTemplate(KConfig& config) { // Q_UNUSED(config); //} ///*! // loads saved matrix properties from \c config. // */ //void CorrelationCoefficientDock::loadConfig(KConfig& config) { // Q_UNUSED(config); //} ///*! // saves matrix properties to \c config. // */ //void CorrelationCoefficientDock::saveConfigAsTemplate(KConfig& config) { // Q_UNUSED(config); //} void CorrelationCoefficientDock::setColumnsComboBoxModel(Spreadsheet* spreadsheet) { m_onlyValuesCols.clear(); m_twoCategoricalCols.clear(); m_multiCategoricalCols.clear(); for (auto* col : spreadsheet->children()) { if (col->columnMode() == AbstractColumn::Integer || col->columnMode() == AbstractColumn::Numeric) m_onlyValuesCols.append(col); else { int np = 0, n_rows = 0; countPartitions(col, np, n_rows); if (np <= 1) continue; else if (np == 2) m_twoCategoricalCols.append(col); else m_multiCategoricalCols.append(col); } } setColumnsComboBoxView(); showCorrelationCoefficient(); } //TODO: change from if else to switch case: void CorrelationCoefficientDock::setColumnsComboBoxView() { ui.cbCol1->clear(); ui.cbCol2->clear(); QList::iterator i; switch (m_test) { case (CorrelationCoefficient::Test::Pearson): { for (i = m_onlyValuesCols.begin(); i != m_onlyValuesCols.end(); i++) { ui.cbCol1->addItem( (*i)->name(), qint64(*i)); ui.cbCol2->addItem( (*i)->name(), qint64(*i)); } for (i = m_twoCategoricalCols.begin(); i != m_twoCategoricalCols.end(); i++) ui.cbCol1->addItem( (*i)->name(), qint64(*i)); break; } case CorrelationCoefficient::Test::Kendall: { for (i = m_onlyValuesCols.begin(); i != m_onlyValuesCols.end(); i++) { ui.cbCol1->addItem( (*i)->name(), qint64(*i)); ui.cbCol2->addItem( (*i)->name(), qint64(*i)); } for (i = m_twoCategoricalCols.begin(); i != m_twoCategoricalCols.end(); i++) { ui.cbCol1->addItem( (*i)->name(), qint64(*i)); ui.cbCol2->addItem( (*i)->name(), qint64(*i)); } for (i = m_multiCategoricalCols.begin(); i != m_multiCategoricalCols.end(); i++) { ui.cbCol1->addItem( (*i)->name(), qint64(*i)); ui.cbCol2->addItem( (*i)->name(), qint64(*i)); } break; } case CorrelationCoefficient::Test::Spearman: { for (i = m_onlyValuesCols.begin(); i != m_onlyValuesCols.end(); i++) { ui.cbCol1->addItem( (*i)->name(), qint64(*i)); ui.cbCol2->addItem( (*i)->name(), qint64(*i)); } for (i = m_twoCategoricalCols.begin(); i != m_twoCategoricalCols.end(); i++) ui.cbCol1->addItem( (*i)->name(), qint64(*i)); break; } } } bool CorrelationCoefficientDock::nonEmptySelectedColumns() { - if (ui.cbCol1->isVisible() && ui.cbCol1->count() < 1) - return false; - if (ui.cbCol2->isVisible() && ui.cbCol2->count() < 1) + if ((ui.cbCol1->isVisible() && ui.cbCol1->count() < 1) || + (ui.cbCol2->isVisible() && ui.cbCol2->count() < 1)) return false; return true; } void CorrelationCoefficientDock::countPartitions(Column *column, int &np, int &total_rows) { total_rows = column->rowCount(); np = 0; QString cell_value; QMap discovered_categorical_var; AbstractColumn::ColumnMode original_col_mode = column->columnMode(); column->setColumnMode(AbstractColumn::Text); for (int i = 0; i < total_rows; i++) { cell_value = column->textAt(i); if (cell_value.isEmpty()) { total_rows = i; break; } if (discovered_categorical_var[cell_value]) continue; discovered_categorical_var[cell_value] = true; np++; } column->setColumnMode(original_col_mode); } diff --git a/src/kdefrontend/dockwidgets/HypothesisTestDock.cpp b/src/kdefrontend/dockwidgets/HypothesisTestDock.cpp index 2804cbbc9..ceae0f807 100644 --- a/src/kdefrontend/dockwidgets/HypothesisTestDock.cpp +++ b/src/kdefrontend/dockwidgets/HypothesisTestDock.cpp @@ -1,877 +1,875 @@ /*************************************************************************** File : HypothesisTestDock.cpp Project : LabPlot Description : widget for hypothesis test properties -------------------------------------------------------------------- Copyright : (C) 2019 Devanshu Agarwal(agarwaldevanshu8@gmail.com) ***************************************************************************/ /*************************************************************************** * * * This program is free software; you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * * the Free Software Foundation; either version 2 of the License, or * * (at your option) any later version. * * * * This program is distributed in the hope that it will be useful, * * but WITHOUT ANY WARRANTY; without even the implied warranty of * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * * GNU General Public License for more details. * * * * You should have received a copy of the GNU General Public License * * along with this program; if not, write to the Free Software * * Foundation, Inc., 51 Franklin Street, Fifth Floor, * * Boston, MA 02110-1301 USA * * * ***************************************************************************/ #include "HypothesisTestDock.h" #include "backend/core/AspectTreeModel.h" #include "backend/core/AbstractAspect.h" #include "backend/core/Project.h" #include "backend/spreadsheet/Spreadsheet.h" #include "commonfrontend/widgets/TreeViewComboBox.h" #include "kdefrontend/datasources/DatabaseManagerDialog.h" #include "kdefrontend/datasources/DatabaseManagerWidget.h" #include "kdefrontend/TemplateHandler.h" #include #include #include #include #include #include #include #include /*! \class HypothesisTestDock \brief Provides a dock (widget) for hypothesis testing: \ingroup kdefrontend */ //TODO: To add tooltips in docks for non obvious widgets. //TODO: Add functionality for database along with spreadsheet. HypothesisTestDock::HypothesisTestDock(QWidget* parent) : QWidget(parent) { //QDEBUG("in hypothesis test constructor "); ui.setupUi(this); ui.cbDataSourceType->addItem(i18n("Spreadsheet")); ui.cbDataSourceType->addItem(i18n("Database")); cbSpreadsheet = new TreeViewComboBox; ui.gridLayout->addWidget(cbSpreadsheet, 5, 4, 1, 3); ui.bDatabaseManager->setIcon(QIcon::fromTheme("network-server-database")); ui.bDatabaseManager->setToolTip(i18n("Manage connections")); m_configPath = QStandardPaths::standardLocations(QStandardPaths::AppDataLocation).constFirst() + "sql_connections"; // adding item to tests and testtype combo box; ui.cbTest->addItem( i18n("T Test"), HypothesisTest::Test::Type::TTest); ui.cbTest->addItem( i18n("Z Test"), HypothesisTest::Test::Type::ZTest); ui.cbTest->addItem( i18n("ANOVA"), HypothesisTest::Test::Type::Anova); ui.lPopulationSigma->setText( UTF8_QSTRING("σ")); // making all test blocks invisible at starting. ui.pbLeveneTest->hide(); ui.lCategorical->hide(); ui.chbCategorical->hide(); ui.lCol1->hide(); ui.cbCol1->hide(); ui.lCol2->hide(); ui.cbCol2->hide(); ui.lCol3->hide(); ui.cbCol3->hide(); ui.lEqualVariance->hide(); ui.chbEqualVariance->hide(); ui.chbEqualVariance->setChecked(true); ui.lPopulationSigma->hide(); ui.lPopulationSigma->setToolTip( i18n("Sigma of Population

" "Hint: Z-Test if preffered over T-Test if this is known")); ui.chbPopulationSigma->hide(); ui.lePopulationSigma->hide(); ui.pbPerformTest->setEnabled(false); ui.rbH1OneTail2->hide(); ui.rbH1OneTail1->hide(); ui.rbH1TwoTail->hide(); ui.rbH0OneTail1->hide(); ui.rbH0OneTail2->hide(); ui.rbH0TwoTail->hide(); ui.lH0->hide(); ui.lH1->hide(); QString mu = UTF8_QSTRING("μ"); QString mu0 = UTF8_QSTRING("μₒ"); // radio button for null and alternate hypothesis // for alternative hypothesis (h1) // one_tail_1 is mu > mu0; one_tail_2 is mu < mu0; two_tail = mu != mu0; ui.rbH1OneTail1->setText( i18n("%1 %2 %3", mu, UTF8_QSTRING(">"), mu0)); ui.rbH1OneTail2->setText( i18n("%1 %2 %3", mu, UTF8_QSTRING("<"), mu0)); ui.rbH1TwoTail->setText( i18n("%1 %2 %3", mu, UTF8_QSTRING("≠"), mu0)); ui.rbH0OneTail1->setText( i18n("%1 %2 %3",mu, UTF8_QSTRING("≤"), mu0)); ui.rbH0OneTail2->setText( i18n("%1 %2 %3", mu, UTF8_QSTRING("≥"), mu0)); ui.rbH0TwoTail->setText( i18n("%1 %2 %3", mu, UTF8_QSTRING("="), mu0)); ui.rbH0TwoTail->setEnabled(false); ui.rbH0OneTail1->setEnabled(false); ui.rbH0OneTail2->setEnabled(false); // setting muo and alpha buttons ui.lMuo->setText( i18n("%1", mu0)); ui.lAlpha->setText( i18n("%1", UTF8_QSTRING("α"))); ui.leMuo->setText( i18n("%1", m_populationMean)); ui.leAlpha->setText( i18n("%1", m_significanceLevel)); ui.lMuo->hide(); ui.lMuo->setToolTip( i18n("Population Mean")); ui.lAlpha->hide(); ui.lAlpha->setToolTip( i18n("Significance Level")); ui.leMuo->hide(); ui.leAlpha->hide(); ui.pbPerformTest->setIcon(QIcon::fromTheme("run-build")); ui.leMuo->setText( i18n("%1", m_populationMean)); ui.leAlpha->setText( i18n("%1", m_significanceLevel)); // readConnections(); // auto* style = ui.bAddRow->style(); // ui.bAddRow->setIcon(style->standardIcon(QStyle::SP_ArrowRight)); // ui.bAddRow->setToolTip(i18n("Add the selected field to rows")); // ui.bRemoveRow->setIcon(style->standardIcon(QStyle::SP_ArrowLeft)); // ui.bRemoveRow->setToolTip(i18n("Remove the selected field from rows")); // ui.bAddColumn->setIcon(style->standardIcon(QStyle::SP_ArrowRight)); // ui.bAddColumn->setToolTip(i18n("Add the selected field to columns")); // ui.bRemoveColumn->setIcon(style->standardIcon(QStyle::SP_ArrowLeft)); // ui.bRemoveColumn->setToolTip(i18n("Remove the selected field from columns")); // //add/remove buttons only enabled if something was selected // ui.bAddRow->setEnabled(false); // ui.bRemoveRow->setEnabled(false); // ui.bAddColumn->setEnabled(false); // ui.bRemoveColumn->setEnabled(false); // connect(ui.leName, &QLineEdit::textChanged, this, &HypothesisTestDock::nameChanged); // connect(ui.leComment, &QLineEdit::textChanged, this, &HypothesisTestDock::commentChanged); connect(ui.cbDataSourceType, static_cast(&QComboBox::currentIndexChanged), this, &HypothesisTestDock::dataSourceTypeChanged); connect(cbSpreadsheet, &TreeViewComboBox::currentModelIndexChanged, this, &HypothesisTestDock::spreadsheetChanged); // connect(ui.cbConnection, static_cast(&QComboBox::currentIndexChanged), // this, &HypothesisTestDock::connectionChanged); // connect(ui.cbTable, static_cast(&QComboBox::currentIndexChanged), // this, &HypothesisTestDock::tableChanged); // connect(ui.bDatabaseManager, &QPushButton::clicked, this, &HypothesisTestDock::showDatabaseManager); // connect(ui.bAddRow, &QPushButton::clicked, this, &HypothesisTestDock::addRow); // connect(ui.bRemoveRow, &QPushButton::clicked, this,&HypothesisTestDock::removeRow); // connect(ui.bAddColumn, &QPushButton::clicked, this, &HypothesisTestDock::addColumn); // connect(ui.bRemoveColumn, &QPushButton::clicked, this,&HypothesisTestDock::removeColumn); // connect(ui.cbCol1, static_cast(&QComboBox::currentIndexChanged), this, &HypothesisTestDock::doTTest); // connect(ui.cbCol2, static_cast(&QComboBox::currentIndexChanged), this, &HypothesisTestDock::doTTest); // connect(ui.lwFields, &QListWidget::itemSelectionChanged, this, [=]() { // bool enabled = !ui.lwFields->selectedItems().isEmpty(); // ui.bAddRow->setEnabled(enabled); // ui.bAddColumn->setEnabled(enabled); // }); // connect(ui.lwRows, &QListWidget::doubleClicked, this,&HypothesisTestDock::removeRow); // connect(ui.lwRows, &QListWidget::itemSelectionChanged, this, [=]() { // ui.bRemoveRow->setEnabled(!ui.lwRows->selectedItems().isEmpty()); // }); // connect(ui.lwColumns, &QListWidget::doubleClicked, this,&HypothesisTestDock::removeColumn); // connect(ui.lwColumns, &QListWidget::itemSelectionChanged, this, [=]() { // ui.bRemoveColumn->setEnabled(!ui.lwColumns->selectedItems().isEmpty()); // }); connect(ui.cbTest, static_cast(&QComboBox::currentIndexChanged), this, &HypothesisTestDock::showTestType); connect(ui.cbTestType, static_cast(&QComboBox::currentIndexChanged), this, &HypothesisTestDock::showHypothesisTest); // connect(ui.cbTest, static_cast(&QComboBox::currentIndexChanged), this, &HypothesisTestDock::showHypothesisTest); // connect(ui.cbTestType, static_cast(&QComboBox::currentIndexChanged), this, &HypothesisTestDock::showHypothesisTest); connect(ui.pbPerformTest, &QPushButton::clicked, this, &HypothesisTestDock::doHypothesisTest); connect(ui.pbLeveneTest, &QPushButton::clicked, this, &HypothesisTestDock::performLeveneTest); //connecting null hypothesis and alternate hypothesis radio button connect(ui.rbH1OneTail1, &QRadioButton::toggled, this, &HypothesisTestDock::onRbH1OneTail1Toggled); connect(ui.rbH1OneTail2, &QRadioButton::toggled, this, &HypothesisTestDock::onRbH1OneTail2Toggled); connect(ui.rbH1TwoTail, &QRadioButton::toggled, this, &HypothesisTestDock::onRbH1TwoTailToggled); connect(ui.cbCol1, static_cast(&QComboBox::currentIndexChanged), this, &HypothesisTestDock::col1IndexChanged); connect(ui.chbCategorical, &QCheckBox::stateChanged, this, &HypothesisTestDock::changeCbCol2Label); connect(ui.chbPopulationSigma, &QCheckBox::stateChanged, this, &HypothesisTestDock::chbPopulationSigmaStateChanged); ui.cbTest->setCurrentIndex(0); emit ui.cbTest->currentIndexChanged(0); ui.cbTestType->setCurrentIndex(0); emit ui.cbTestType->currentIndexChanged(0); } void HypothesisTestDock::setHypothesisTest(HypothesisTest* HypothesisTest) { //QDEBUG("in set hypothesis test"); m_initializing = true; m_hypothesisTest = HypothesisTest; m_aspectTreeModel = new AspectTreeModel(m_hypothesisTest->project()); QList list{AspectType::Folder, AspectType::Workbook, AspectType::Spreadsheet, AspectType::LiveDataSource}; cbSpreadsheet->setTopLevelClasses(list); list = {AspectType::Spreadsheet, AspectType::LiveDataSource}; m_aspectTreeModel->setSelectableAspects(list); cbSpreadsheet->setModel(m_aspectTreeModel); //show the properties ui.leName->setText(m_hypothesisTest->name()); ui.leComment->setText(m_hypothesisTest->comment()); ui.cbDataSourceType->setCurrentIndex(m_hypothesisTest->dataSourceType()); if (m_hypothesisTest->dataSourceType() == HypothesisTest::DataSourceType::DataSourceSpreadsheet) setModelIndexFromAspect(cbSpreadsheet, m_hypothesisTest->dataSourceSpreadsheet()); // else // ui.cbConnection->setCurrentIndex(ui.cbConnection->findText(m_hypothesisTest->dataSourceConnection())); setColumnsComboBoxModel(m_hypothesisTest->dataSourceSpreadsheet()); this->dataSourceTypeChanged(ui.cbDataSourceType->currentIndex()); //setting rows and columns in combo box; //undo functions // connect(m_hypothesisTest, SIGNAL(aspectDescriptionChanged(const AbstractAspect*)), this, SLOT(hypothesisTestDescriptionChanged(const AbstractAspect*))); m_initializing = false; } void HypothesisTestDock::showTestType() { //QDEBUG("in show test type"); m_test.type = HypothesisTest::Test::Type(ui.cbTest->currentData().toInt()); ui.cbTestType->clear(); if (m_test.type & (HypothesisTest::Test::Type::TTest | HypothesisTest::Test::Type::ZTest)) { ui.cbTestType->addItem( i18n("Two Sample Independent"), HypothesisTest::Test::SubType::TwoSampleIndependent); ui.cbTestType->addItem( i18n("Two Sample Paired"), HypothesisTest::Test::SubType::TwoSamplePaired); ui.cbTestType->addItem( i18n("One Sample"), HypothesisTest::Test::SubType::OneSample); } else if (m_test.type & HypothesisTest::Test::Type::Anova) { ui.cbTestType->addItem( i18n("One Way"), HypothesisTest::Test::SubType::OneWay); ui.cbTestType->addItem( i18n("Two Way"), HypothesisTest::Test::SubType::TwoWay); } } void HypothesisTestDock::showHypothesisTest() { //QDEBUG("in showHypothesisTest"); if (ui.cbTestType->count() == 0) return; m_test.subtype = HypothesisTest::Test::SubType(ui.cbTestType->currentData().toInt()); ui.lCol1->show(); ui.cbCol1->show(); ui.lCol2->setVisible(bool(m_test.subtype & (~HypothesisTest::Test::SubType::OneSample))); ui.cbCol2->setVisible(bool(m_test.subtype & (~HypothesisTest::Test::SubType::OneSample))); ui.lCol3->setVisible(bool(m_test.type & (HypothesisTest::Test::Anova) & setAllBits(m_test.subtype & HypothesisTest::Test::SubType::TwoWay))); ui.cbCol3->setVisible(bool(m_test.type & (HypothesisTest::Test::Anova) & setAllBits(m_test.subtype & HypothesisTest::Test::SubType::TwoWay))); ui.lEqualVariance->setVisible(bool( (m_test.type & HypothesisTest::Test::Type::TTest) & (m_test.subtype & HypothesisTest::Test::SubType::TwoSampleIndependent))); ui.chbEqualVariance->setVisible(bool( (m_test.type & HypothesisTest::Test::Type::TTest) & (m_test.subtype & HypothesisTest::Test::SubType::TwoSampleIndependent))); ui.lCategorical->setVisible(bool((m_test.type & HypothesisTest::Test::Type::TTest) & (m_test.subtype & HypothesisTest::Test::SubType::TwoSampleIndependent))); ui.chbCategorical->setVisible(bool((m_test.type & HypothesisTest::Test::Type::TTest) & (m_test.subtype & HypothesisTest::Test::SubType::TwoSampleIndependent))); ui.chbEqualVariance->setChecked(true); ui.lPopulationSigma->setVisible(bool((m_test.type & (HypothesisTest::Test::Type::TTest | HypothesisTest::Test::Type::ZTest)) & ~(setAllBits(m_test.subtype & HypothesisTest::Test::SubType::OneSample)))); ui.chbPopulationSigma->setVisible(bool((m_test.type & (HypothesisTest::Test::Type::TTest | HypothesisTest::Test::Type::ZTest)) & ~(setAllBits(m_test.subtype & HypothesisTest::Test::SubType::OneSample)))); ui.chbPopulationSigma->setChecked(false); ui.pbLeveneTest->setVisible(bool((m_test.type & HypothesisTest::Test::Type::Anova & setAllBits(m_test.subtype & HypothesisTest::Test::SubType::OneWay)) | (HypothesisTest::Test::Type::TTest & setAllBits(m_test.subtype & HypothesisTest::Test::SubType::TwoSampleIndependent)))); ui.lH1->setVisible(bool(m_test.type & ~HypothesisTest::Test::Type::Anova)); ui.rbH1OneTail1->setVisible(bool(m_test.type & ~HypothesisTest::Test::Type::Anova)); ui.rbH1OneTail2->setVisible(bool(m_test.type & ~HypothesisTest::Test::Type::Anova)); ui.rbH1TwoTail->setVisible(bool(m_test.type & ~HypothesisTest::Test::Type::Anova)); ui.lH0->setVisible(bool(m_test.type & ~HypothesisTest::Test::Type::Anova)); ui.rbH0OneTail1->setVisible(bool(m_test.type & ~HypothesisTest::Test::Type::Anova)); ui.rbH0OneTail2->setVisible(bool(m_test.type & ~HypothesisTest::Test::Type::Anova)); ui.rbH0TwoTail->setVisible(bool(m_test.type & ~HypothesisTest::Test::Type::Anova)); ui.rbH1TwoTail->setChecked(true); ui.lMuo->setVisible(bool(m_test.subtype & HypothesisTest::Test::SubType::OneSample)); ui.leMuo->setVisible(bool(ui.lMuo->isVisible())); ui.lAlpha->show(); ui.leAlpha->show(); setColumnsComboBoxView(); ui.pbPerformTest->setEnabled(nonEmptySelectedColumns()); ui.pbLeveneTest->setEnabled(nonEmptySelectedColumns()); } void HypothesisTestDock::doHypothesisTest() { //QDEBUG("in doHypothesisTest"); m_hypothesisTest->setPopulationMean(ui.leMuo->text()); m_hypothesisTest->setSignificanceLevel(ui.leAlpha->text()); QVector cols; if (ui.cbCol1->count() == 0) return; cols << reinterpret_cast(ui.cbCol1->currentData().toLongLong()); if (m_test.subtype & HypothesisTest::Test::SubType::TwoWay) cols << reinterpret_cast(ui.cbCol3->currentData().toLongLong()); if (m_test.subtype & (~HypothesisTest::Test::SubType::OneSample)) if (ui.cbCol2->count() > 0) cols << reinterpret_cast(ui.cbCol2->currentData().toLongLong()); m_hypothesisTest->setColumns(cols); m_hypothesisTest->performTest(m_test, ui.chbCategorical->isChecked(), ui.chbEqualVariance->isChecked()); } void HypothesisTestDock::performLeveneTest() { QVector cols; if (ui.cbCol1->count() == 0 || ui.cbCol2->count() == 0) return; cols << reinterpret_cast(ui.cbCol1->currentData().toLongLong()); cols << reinterpret_cast(ui.cbCol2->currentData().toLongLong()); m_hypothesisTest->setColumns(cols); m_hypothesisTest->setSignificanceLevel(ui.leAlpha->text()); m_hypothesisTest->performLeveneTest(ui.chbCategorical->isChecked()); } void HypothesisTestDock::setModelIndexFromAspect(TreeViewComboBox* cb, const AbstractAspect* aspect) { if (aspect) cb->setCurrentModelIndex(m_aspectTreeModel->modelIndexOfAspect(aspect)); else cb->setCurrentModelIndex(QModelIndex()); } ///*! // shows the database manager where the connections are created and edited. // The selected connection is selected in the connection combo box in this widget. //**/ //void HypothesisTestDock::showDatabaseManager() { // DatabaseManagerDialog* dlg = new DatabaseManagerDialog(this, ui.cbConnection->currentText()); // if (dlg->exec() == QDialog::Accepted) { // //re-read the available connections to be in sync with the changes in DatabaseManager // m_initializing = true; // ui.cbConnection->clear(); // readConnections(); // //select the connection the user has selected in DatabaseManager // const QString& conn = dlg->connection(); // ui.cbConnection->setCurrentIndex(ui.cbConnection->findText(conn)); // m_initializing = false; // connectionChanged(); // } // delete dlg; //} ///*! // loads all available saved connections //*/ //void HypothesisTestDock::readConnections() { // DEBUG("ImportSQLDatabaseWidget: reading available connections"); // KConfig config(m_configPath, KConfig::SimpleConfig); // for (const auto& name : config.groupList()) // ui.cbConnection->addItem(name); //} ///*! // * adds the selected field to the rows // */ //void HypothesisTestDock::addRow() { // QString field = ui.lwFields->currentItem()->text(); // ui.lwRows->addItem(field); // ui.lwFields->takeItem(ui.lwFields->currentRow()); // m_hypothesisTest->addToRows(field); //} ///*! // * removes the selected field from the rows // */ //void HypothesisTestDock::removeRow() { // const QString& field = ui.lwRows->currentItem()->text(); // ui.lwRows->takeItem(ui.lwRows->currentRow()); // m_hypothesisTest->removeFromRows(field); // updateFields(); //} ///*! // * adds the selected field to the columns // */ //void HypothesisTestDock::addColumn() { // QString field = ui.lwFields->currentItem()->text(); // ui.lwColumns->addItem(field); // ui.lwFields->takeItem(ui.lwFields->currentRow()); // m_hypothesisTest->addToColumns(field); //} ///*! // * removes the selected field from the columns // */ //void HypothesisTestDock::removeColumn() { // const QString& field = ui.lwColumns->currentItem()->text(); // ui.lwColumns->takeItem(ui.lwColumns->currentRow()); // m_hypothesisTest->removeFromColumns(field); // updateFields(); //} ///*! // * re-populates the content of the "Fields" list widget by adding the non-selected fields only. // * called when a selected field is removed from rows or columns. // */ //void HypothesisTestDock::updateFields() { // ui.lwFields->clear(); // for (auto dimension : m_hypothesisTest->dimensions()) // if (!fieldSelected(dimension)) // ui.lwFields->addItem(new QListWidgetItem(QIcon::fromTheme("draw-text"), dimension)); // for (auto measure : m_hypothesisTest->measures()) // if (!fieldSelected(measure)) // ui.lwFields->addItem(new QListWidgetItem(measure)); //} ///*! // * return \c true if the field name \c field was selected among rows or columns, // * return \c false otherwise. // * */ //bool HypothesisTestDock::fieldSelected(const QString& field) { // for (int i = 0; icount(); ++i) // if (ui.lwRows->item(i)->text() == field) // return true; // for (int i = 0; icount(); ++i) // if (ui.lwColumns->item(i)->text() == field) // return true; // return false; //} ////************************************************************* ////****** SLOTs for changes triggered in HypothesisTestDock ******* ////************************************************************* //void HypothesisTestDock::nameChanged() { // if (m_initializing) // return; // m_hypothesisTest->setName(ui.leName->text()); //} //void HypothesisTestDock::commentChanged() { // if (m_initializing) // return; // m_hypothesisTest->setComment(ui.leComment->text()); //} void HypothesisTestDock::dataSourceTypeChanged(int index) { //QDEBUG("in dataSourceTypeChanged"); HypothesisTest::DataSourceType type = static_cast(index); bool showDatabase = (type == HypothesisTest::DataSourceType::DataSourceDatabase); ui.lSpreadsheet->setVisible(!showDatabase); cbSpreadsheet->setVisible(!showDatabase); ui.lConnection->setVisible(showDatabase); ui.cbConnection->setVisible(showDatabase); ui.bDatabaseManager->setVisible(showDatabase); ui.lTable->setVisible(showDatabase); ui.cbTable->setVisible(showDatabase); if (m_initializing) return; m_hypothesisTest->setComment(ui.leComment->text()); } void HypothesisTestDock::spreadsheetChanged(const QModelIndex& index) { //QDEBUG("in spreadsheetChanged"); auto* aspect = static_cast(index.internalPointer()); Spreadsheet* spreadsheet = dynamic_cast(aspect); setColumnsComboBoxModel(spreadsheet); m_hypothesisTest->setDataSourceSpreadsheet(spreadsheet); } void HypothesisTestDock::changeCbCol2Label() { //QDEBUG("in changeCbCol2Label"); if ( (m_test.type & ~HypothesisTest::Test::Type::Anova) & (m_test.subtype & ~HypothesisTest::Test::SubType::TwoSampleIndependent)) { ui.lCol2->setText( i18n("Independent Var. 2")); return; } if (ui.cbCol1->count() == 0) return; QString selected_text = ui.cbCol1->currentText(); Column* col1 = m_hypothesisTest->dataSourceSpreadsheet()->column(selected_text); if (!ui.chbCategorical->isChecked() && (col1->columnMode() == AbstractColumn::Integer || col1->columnMode() == AbstractColumn::Numeric)) { ui.lCol2->setText( i18n("Independent Var. 2")); ui.chbCategorical->setChecked(false); ui.chbCategorical->setEnabled(true); } else { ui.lCol2->setText( i18n("Dependent Var. 1")); if (!ui.chbCategorical->isChecked()) ui.chbCategorical->setEnabled(false); else ui.chbCategorical->setEnabled(true); ui.chbCategorical->setChecked(true); } } void HypothesisTestDock::chbPopulationSigmaStateChanged() { if (ui.chbPopulationSigma->isVisible() && ui.chbPopulationSigma->isChecked()) ui.lePopulationSigma->show(); else ui.lePopulationSigma->hide(); } void HypothesisTestDock::col1IndexChanged(int index) { if (index < 0) return; changeCbCol2Label(); } //void HypothesisTestDock::connectionChanged() { // if (ui.cbConnection->currentIndex() == -1) { // ui.lTable->hide(); // ui.cbTable->hide(); // return; // } // //clear the previously shown tables // ui.cbTable->clear(); // ui.lTable->show(); // ui.cbTable->show(); // const QString& connection = ui.cbConnection->currentText(); // //connection name was changed, determine the current connections settings // KConfig config(m_configPath, KConfig::SimpleConfig); // KConfigGroup group = config.group(connection); // //close and remove the previos connection, if available // if (m_db.isOpen()) { // m_db.close(); // QSqlDatabase::removeDatabase(m_db.driverName()); // } // //open the selected connection // //QDEBUG("HypothesisTestDock: connecting to " + connection); // const QString& driver = group.readEntry("Driver"); // m_db = QSqlDatabase::addDatabase(driver); // const QString& dbName = group.readEntry("DatabaseName"); // if (DatabaseManagerWidget::isFileDB(driver)) { // if (!QFile::exists(dbName)) { // KMessageBox::error(this, i18n("Couldn't find the database file '%1'. Please check the connection settings.", dbName), // appendRow i18n("Connection Failed")); // return; // } else // m_db.setDatabaseName(dbName); // } else if (DatabaseManagerWidget::isODBC(driver)) { // if (group.readEntry("CustomConnectionEnabled", false)) // m_db.setDatabaseName(group.readEntry("CustomConnectionString")); // else // m_db.setDatabaseName(dbName); // } else { // m_db.setDatabaseName(dbName); // m_db.setHostName( group.readEntry("HostName") ); // m_db.setPort( group.readEntry("Port", 0) ); // m_db.setUserName( group.readEntry("UserName") ); // m_db.setPassword( group.readEntry("Password") ); // } // WAIT_CURSOR; // if (!m_db.open()) { // RESET_CURSOR; // KMessageBox::error(this, i18n("Failed to connect to the database '%1'. Please check the connection settings.", ui.cbConnection->currentText()) + // QLatin1String("\n\n") + m_db.lastError().databaseText(), // i18n("Connection Failed")); // return; // } // //show all available database tables // if (m_db.tables().size()) { // for (auto table : m_db.tables()) // ui.cbTable->addItem(QIcon::fromTheme("view-form-table"), table); // ui.cbTable->setCurrentIndex(0); // } // RESET_CURSOR; // if (m_initializing) // return; //// m_hypothesisTest->setDataSourceConnection(connection); //} //void HypothesisTestDock::tableChanged() { // const QString& table = ui.cbTable->currentText(); // //show all attributes of the selected table //// for (const auto* col : spreadsheet->children()) { //// QListWidgetItem* item = new QListWidgetItem(col->icon(), col->name()); //// ui.lwFields->addItem(item); //// } // if (m_initializing) // return; //// m_hypothesisTest->setDataSourceTable(table); //} ////************************************************************* ////******** SLOTs for changes triggered in Spreadsheet ********* ////************************************************************* void HypothesisTestDock::hypothesisTestDescriptionChanged(const AbstractAspect* aspect) { //QDEBUG("in hypothesisTestDescriptionChanged"); if (m_hypothesisTest != aspect) return; m_initializing = true; if (aspect->name() != ui.leName->text()) ui.leName->setText(aspect->name()); else if (aspect->comment() != ui.leComment->text()) ui.leComment->setText(aspect->comment()); m_initializing = false; } ////************************************************************* ////******************** SETTINGS ******************************* ////************************************************************* //void HypothesisTestDock::load() { //} //void HypothesisTestDock::loadConfigFromTemplate(KConfig& config) { // Q_UNUSED(config); //} ///*! // loads saved matrix properties from \c config. // */ //void HypothesisTestDock::loadConfig(KConfig& config) { // Q_UNUSED(config); //} ///*! // saves matrix properties to \c config. // */ //void HypothesisTestDock::saveConfigAsTemplate(KConfig& config) { // Q_UNUSED(config); //} //TODO: Rather than inbuilt slots use own decided slots for checked rather than clicked // for alternate hypothesis // one_tail_1 is mu > mu0; one_tail_2 is mu < mu0; two_tail = mu != mu0; void HypothesisTestDock::onRbH1OneTail1Toggled(bool checked) { if (!checked) return; ui.rbH0OneTail1->setChecked(true); m_test.tail = HypothesisTest::Test::Tail::Positive; } void HypothesisTestDock::onRbH1OneTail2Toggled(bool checked) { if (!checked) return; ui.rbH0OneTail2->setChecked(true); m_test.tail = HypothesisTest::Test::Tail::Negative; } void HypothesisTestDock::onRbH1TwoTailToggled(bool checked) { if (!checked) return; ui.rbH0TwoTail->setChecked(true); m_test.tail = HypothesisTest::Test::Tail::Two; } /**************************************Helper Functions********************************************/ void HypothesisTestDock::countPartitions(Column *column, int &np, int &total_rows) { total_rows = column->rowCount(); np = 0; QString cell_value; QMap discovered_categorical_var; AbstractColumn::ColumnMode original_col_mode = column->columnMode(); column->setColumnMode(AbstractColumn::Text); for (int i = 0; i < total_rows; i++) { cell_value = column->textAt(i); if (cell_value.isEmpty()) { total_rows = i; break; } if (discovered_categorical_var[cell_value]) continue; discovered_categorical_var[cell_value] = true; np++; } column->setColumnMode(original_col_mode); } void HypothesisTestDock::setColumnsComboBoxModel(Spreadsheet* spreadsheet) { m_onlyValuesCols.clear(); m_twoCategoricalCols.clear(); m_multiCategoricalCols.clear(); for (auto* col : spreadsheet->children()) { if (col->columnMode() == AbstractColumn::Integer || col->columnMode() == AbstractColumn::Numeric) m_onlyValuesCols.append(col); else { int np = 0, n_rows = 0; countPartitions(col, np, n_rows); if (np <= 1) continue; else if (np == 2) m_twoCategoricalCols.append(col); else m_multiCategoricalCols.append(col); } } setColumnsComboBoxView(); showHypothesisTest(); } //TODO: change from if else to switch case: void HypothesisTestDock::setColumnsComboBoxView() { ui.cbCol1->clear(); ui.cbCol2->clear(); ui.cbCol3->clear(); QList::iterator i; switch (m_test.type) { case (HypothesisTest::Test::Type::ZTest): case (HypothesisTest::Test::Type::TTest): { switch (m_test.subtype) { case (HypothesisTest::Test::SubType::TwoSampleIndependent): { for (i = m_onlyValuesCols.begin(); i != m_onlyValuesCols.end(); i++) { ui.cbCol1->addItem( (*i)->name(), qint64(*i)); ui.cbCol2->addItem( (*i)->name(), qint64(*i)); } for (i = m_twoCategoricalCols.begin(); i != m_twoCategoricalCols.end(); i++) ui.cbCol1->addItem( (*i)->name(), qint64(*i)); break; } case (HypothesisTest::Test::SubType::TwoSamplePaired): { for (i = m_onlyValuesCols.begin(); i != m_onlyValuesCols.end(); i++) { ui.cbCol1->addItem( (*i)->name(), qint64(*i)); ui.cbCol2->addItem( (*i)->name(), qint64(*i)); } break; } case (HypothesisTest::Test::SubType::OneSample): { for (i = m_onlyValuesCols.begin(); i != m_onlyValuesCols.end(); i++) ui.cbCol1->addItem( (*i)->name(), qint64(*i)); break; } case HypothesisTest::Test::SubType::OneWay: case HypothesisTest::Test::SubType::TwoWay: case HypothesisTest::Test::SubType::NoneSubType: break; } break; } case HypothesisTest::Test::Type::Anova: { switch (m_test.subtype) { case HypothesisTest::Test::SubType::OneWay: { for (i = m_onlyValuesCols.begin(); i != m_onlyValuesCols.end(); i++) ui.cbCol2->addItem( (*i)->name(), qint64(*i)); for (i = m_twoCategoricalCols.begin(); i != m_twoCategoricalCols.end(); i++) ui.cbCol1->addItem( (*i)->name(), qint64(*i)); for (i = m_multiCategoricalCols.begin(); i != m_multiCategoricalCols.end(); i++) ui.cbCol1->addItem( (*i)->name(), qint64(*i)); break; } case HypothesisTest::Test::SubType::TwoWay: { for (i = m_onlyValuesCols.begin(); i != m_onlyValuesCols.end(); i++) ui.cbCol2->addItem( (*i)->name(), qint64(*i)); for (i = m_twoCategoricalCols.begin(); i != m_twoCategoricalCols.end(); i++) { ui.cbCol1->addItem( (*i)->name(), qint64(*i)); ui.cbCol3->addItem( (*i)->name(), qint64(*i)); } for (i = m_multiCategoricalCols.begin(); i != m_multiCategoricalCols.end(); i++) { ui.cbCol1->addItem( (*i)->name(), qint64(*i)); ui.cbCol3->addItem( (*i)->name(), qint64(*i)); } break; } case HypothesisTest::Test::SubType::TwoSampleIndependent: case HypothesisTest::Test::SubType::TwoSamplePaired: case HypothesisTest::Test::SubType::OneSample: case HypothesisTest::Test::SubType::NoneSubType: break; } break; } case HypothesisTest::Test::Type::NoneType: break; } } bool HypothesisTestDock::nonEmptySelectedColumns() { - if (ui.cbCol1->isVisible() && ui.cbCol1->count() < 1) - return false; - if (ui.cbCol2->isVisible() && ui.cbCol2->count() < 1) - return false; - if (ui.cbCol3->isVisible() && ui.cbCol3->count() < 1) + if ((ui.cbCol1->isVisible() && ui.cbCol1->count() < 1) || + (ui.cbCol2->isVisible() && ui.cbCol2->count() < 1) || + (ui.cbCol3->isVisible() && ui.cbCol3->count() < 1)) return false; return true; } uint8_t HypothesisTestDock::setAllBits(const uint8_t& bits) { if (!bits) return 0; return ~(bits & (bits-1)); }