diff --git a/src/backend/generalTest/CorrelationCoefficient.cpp b/src/backend/generalTest/CorrelationCoefficient.cpp
index 43b0c0533..c71a99348 100644
--- a/src/backend/generalTest/CorrelationCoefficient.cpp
+++ b/src/backend/generalTest/CorrelationCoefficient.cpp
@@ -1,433 +1,437 @@
/***************************************************************************
File : CorrelationCoefficient.cpp
Project : LabPlot
Description : Finding Correlation Coefficient on data provided
--------------------------------------------------------------------
Copyright : (C) 2019 Devanshu Agarwal(agarwaldevanshu8@gmail.com)
***************************************************************************/
/***************************************************************************
* *
* This program is free software; you can redistribute it and/or modify *
* it under the terms of the GNU General Public License as published by *
* the Free Software Foundation; either version 2 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU General Public License for more details. *
* *
* You should have received a copy of the GNU General Public License *
* along with this program; if not, write to the Free Software *
* Foundation, Inc., 51 Franklin Street, Fifth Floor, *
* Boston, MA 02110-1301 USA *
* *
***************************************************************************/
#include "CorrelationCoefficient.h"
#include "GeneralTest.h"
#include "kdefrontend/generalTest/CorrelationCoefficientView.h"
#include "backend/spreadsheet/Spreadsheet.h"
#include "backend/core/column/Column.h"
#include "backend/lib/macros.h"
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
extern "C" {
#include "backend/nsl/nsl_stats.h"
}
CorrelationCoefficient::CorrelationCoefficient(const QString &name) : GeneralTest (name, AspectType::CorrelationCoefficient) {
}
CorrelationCoefficient::~CorrelationCoefficient() {
}
void CorrelationCoefficient::performTest(Test test, bool categoricalVariable) {
//QDEBUG("in perform test");
m_statsTable = "";
m_tooltips.clear();
m_correlationValue = 0;
m_statisticValue.clear();
m_pValue.clear();
for (int i = 0; i < RESULTLINESCOUNT; i++)
m_resultLine[i]->clear();
switch (test) {
case CorrelationCoefficient::Test::Pearson: {
m_currTestName = "" + i18n("Pearson's r Correlation Test") + "
";
performPearson(categoricalVariable);
break;
}
case CorrelationCoefficient::Test::Kendall:
m_currTestName = "" + i18n("Kendall's Rank Correlation Test") + "
";
performKendall();
break;
case CorrelationCoefficient::Test::Spearman: {
m_currTestName = "" + i18n("Spearman Correlation Coefficient Test") + "
";
performSpearman();
break;
}
}
emit changed();
}
double CorrelationCoefficient::correlationValue() const{
return m_correlationValue;
}
QList CorrelationCoefficient::statisticValue() const{
return m_statisticValue;
}
QList CorrelationCoefficient::pValue() const{
return m_pValue;
}
/***************************************************************************************************************************
* Private Implementations
* ************************************************************************************************************************/
/*********************************************Pearson r ******************************************************************/
//Formulaes are taken from https://www.statisticssolutions.com/correlation-pearson-kendall-spearman/
// variables:
// N = total number of observations
// sumColx = sum of values in colx
// sumSqColx = sum of square of values in colx
// sumColxColy = sum of product of values in colx and coly
//TODO: support for col1 is categorical.
//TODO: add automatic test
//TODO: add tooltip for correlation value result
//TODO: find p value
void CorrelationCoefficient::performPearson(bool categoricalVariable) {
//QDEBUG("in pearson");
if (m_columns.count() != 2) {
printError("Select only 2 columns ");
return;
}
if (categoricalVariable) {
printLine(1, "currently categorical variable not supported", "blue");
return;
}
QString col1Name = m_columns[0]->name();
QString col2Name = m_columns[1]->name();
if (!isNumericOrInteger(m_columns[1])) {
printError("Column " + col2Name + " should contain only numeric or interger values");
}
int N = findCount(m_columns[0]);
if (N != findCount(m_columns[1])) {
printError("Number of data values in Column: " + col1Name + "and Column: " + col2Name + "are not equal");
return;
}
double sumCol1 = findSum(m_columns[0], N);
double sumCol2 = findSum(m_columns[1], N);
double sumSqCol1 = findSumSq(m_columns[0], N);
double sumSqCol2 = findSumSq(m_columns[1], N);
double sumCol12 = 0;
for (int i = 0; i < N; i++)
sumCol12 += m_columns[0]->valueAt(i) *
m_columns[1]->valueAt(i);
// printing table;
// cell constructor structure; data, level, rowSpanCount, m_columnspanCount, isHeader;
QList rowMajor;
int level = 0;
// horizontal header
QString sigma = UTF8_QSTRING("Σ");
rowMajor.append(new Cell("", level, true));
rowMajor.append(new Cell("N", level, true, "Total Number of Observations"));
rowMajor.append(new Cell(QString(sigma + "Scores"), level, true, "Sum of Scores in each column"));
rowMajor.append(new Cell(QString(sigma + "Scores2"), level, true, "Sum of Squares of scores in each column"));
rowMajor.append(new Cell(QString(sigma + "(" + UTF8_QSTRING("∏") + "Scores)"), level, true, "Sum of product of scores of both columns"));
//data with vertical header.
level++;
rowMajor.append(new Cell(col1Name, level, true));
rowMajor.append(new Cell(N, level));
rowMajor.append(new Cell(sumCol1, level));
rowMajor.append(new Cell(sumSqCol1, level));
rowMajor.append(new Cell(sumCol12, level, false, "", 2, 1));
level++;
rowMajor.append(new Cell(col2Name, level, true));
rowMajor.append(new Cell(N, level));
rowMajor.append(new Cell(sumCol2, level));
rowMajor.append(new Cell(sumSqCol2, level));
m_statsTable += getHtmlTable3(rowMajor);
m_correlationValue = (N * sumCol12 - sumCol1*sumCol2) /
sqrt((N * sumSqCol1 - gsl_pow_2(sumCol1)) *
(N * sumSqCol2 - gsl_pow_2(sumCol2)));
printLine(0, QString("Correlation Value is %1").arg(round(m_correlationValue)), "green");
}
/***********************************************Kendall ******************************************************************/
// used knight algorithm for fast performance O(nlogn) rather than O(n^2)
// http://adereth.github.io/blog/2013/10/30/efficiently-computing-kendalls-tau/
// TODO: Change date format type to original for numeric type;
// TODO: add tooltips.
// TODO: Compute tauB for ties.
// TODO: find P Value from Z Value
void CorrelationCoefficient::performKendall() {
+ QDEBUG("in perform kendall")
+
if (m_columns.count() != 2) {
printError("Select only 2 columns ");
return;
}
QString col1Name = m_columns[0]->name();
QString col2Name = m_columns[1]->name();
int N = findCount(m_columns[0]);
if (N != findCount(m_columns[1])) {
printError("Number of data values in Column: " + col1Name + "and Column: " + col2Name + "are not equal");
+ QDEBUG("unequal number of rows")
return;
}
int col2Ranks[N];
if (isNumericOrInteger(m_columns[0]) || isNumericOrInteger(m_columns[1])) {
if (isNumericOrInteger(m_columns[0]) && isNumericOrInteger(m_columns[1])) {
for (int i = 0; i < N; i++)
col2Ranks[int(m_columns[0]->valueAt(i)) - 1] = int(m_columns[1]->valueAt(i));
} else {
printError(QString("Ranking System should be same for both Column: %1 and Column: %2 "
"Hint: Check for data types of columns").arg(col1Name).arg(col2Name));
+ QDEBUG("ranking system not same")
return;
}
} else {
AbstractColumn::ColumnMode origCol1Mode = m_columns[0]->columnMode();
AbstractColumn::ColumnMode origCol2Mode = m_columns[1]->columnMode();
m_columns[0]->setColumnMode(AbstractColumn::Text);
m_columns[1]->setColumnMode(AbstractColumn::Text);
QMap ValueToRank;
for (int i = 0; i < N; i++) {
if (ValueToRank[m_columns[0]->textAt(i)] != 0) {
printError("Currently ties are not supported");
m_columns[0]->setColumnMode(origCol1Mode);
m_columns[1]->setColumnMode(origCol2Mode);
return;
}
ValueToRank[m_columns[0]->textAt(i)] = i + 1;
}
for (int i = 0; i < N; i++)
col2Ranks[i] = ValueToRank[m_columns[1]->textAt(i)];
m_columns[0]->setColumnMode(origCol1Mode);
m_columns[1]->setColumnMode(origCol2Mode);
}
int nPossiblePairs = (N * (N - 1)) / 2;
int nDiscordant = findDiscordants(col2Ranks, 0, N - 1);
int nCorcordant = nPossiblePairs - nDiscordant;
- double m_correlationValue = double(nCorcordant - nDiscordant) / nPossiblePairs;
+ m_correlationValue = double(nCorcordant - nDiscordant) / nPossiblePairs;
m_statisticValue.append((3 * (nCorcordant - nDiscordant)) /
sqrt(N * (N- 1) * (2 * N + 5) / 2));
printLine(0 , QString("Number of Discordants are %1").arg(nDiscordant), "green");
printLine(1 , QString("Number of Concordant are %1").arg(nCorcordant), "green");
printLine(2 , QString("Tau a is %1").arg(round(m_correlationValue)), "green");
printLine(3 , QString("Z Value is %1").arg(round(m_statisticValue[0])), "green");
return;
}
/***********************************************Spearman ******************************************************************/
// All formulaes and symbols are taken from : https://www.statisticshowto.datasciencecentral.com/spearman-rank-correlation-definition-calculate/
void CorrelationCoefficient::performSpearman() {
if (m_columns.count() != 2) {
printError("Select only 2 columns ");
return;
}
QString col1Name = m_columns[0]->name();
QString col2Name = m_columns[1]->name();
int N = findCount(m_columns[0]);
if (N != findCount(m_columns[1])) {
printError("Number of data values in Column: " + col1Name + "and Column: " + col2Name + "are not equal");
return;
}
QMap col1Ranks;
convertToRanks(m_columns[0], N, col1Ranks);
QMap col2Ranks;
convertToRanks(m_columns[1], N, col2Ranks);
double ranksCol1Mean = 0;
double ranksCol2Mean = 0;
// QString ranks1 = "";
// QString ranks2 = "";
for (int i = 0; i < N; i++) {
ranksCol1Mean += col1Ranks[int(m_columns[0]->valueAt(i))];
ranksCol2Mean += col2Ranks[int(m_columns[1]->valueAt(i))];
// ranks1 += ", " + QString::number(col1Ranks[m_columns[0]->valueAt(i)]);
// ranks2 += ", " + QString::number(col2Ranks[m_columns[1]->valueAt(i)]);
}
ranksCol1Mean = ranksCol1Mean / N;
ranksCol2Mean = ranksCol2Mean / N;
//QDEBUG("ranks 1 and ranks2 are " );
//QDEBUG(ranks1);
//QDEBUG(ranks2);
//QDEBUG("Mean ranks are " << ranksCol1Mean << ranksCol2Mean);
double s12 = 0;
double s1 = 0;
double s2 = 0;
for (int i = 0; i < N; i++) {
double centeredRank_1 = col1Ranks[int(m_columns[0]->valueAt(i))] - ranksCol1Mean;
double centeredRank_2 = col2Ranks[int(m_columns[1]->valueAt(i))] - ranksCol2Mean;
s12 += centeredRank_1 * centeredRank_2;
s1 += gsl_pow_2(centeredRank_1);
s2 += gsl_pow_2(centeredRank_2);
}
s12 = s12 / N;
s1 = s1 / N;
s2 = s2 / N;
//QDEBUG("s12, s1, s2 are " << s12 << " " << s1 << " " << s2);
m_correlationValue = s12 / std::sqrt(s1 * s2);
printLine(0, QString("Spearman Rank Correlation value is %1").arg(m_correlationValue), "green");
}
/***********************************************Helper Functions******************************************************************/
int CorrelationCoefficient::findDiscordants(int *ranks, int start, int end) {
if (start >= end)
return 0;
int mid = (start + end) / 2;
int leftDiscordants = findDiscordants(ranks, start, mid);
int rightDiscordants = findDiscordants(ranks, mid + 1, end);
int len = end - start + 1;
int leftLen = mid - start + 1;
int rightLen = end - mid;
int leftLenRemain = leftLen;
int leftRanks[leftLen];
int rightRanks[rightLen];
for (int i = 0; i < leftLen; i++)
leftRanks[i] = ranks[start + i];
for (int i = leftLen; i < leftLen + rightLen; i++)
rightRanks[i - leftLen] = ranks[start + i];
int mergeDiscordants = 0;
int i = 0, j = 0, k =0;
while (i < len) {
if (j >= leftLen) {
ranks[start + i] = rightRanks[k];
k++;
} else if (k >= rightLen) {
ranks[start + i] = leftRanks[j];
j++;
} else if (leftRanks[j] < rightRanks[k]) {
ranks[start + i] = leftRanks[j];
j++;
leftLenRemain--;
} else if (leftRanks[j] > rightRanks[k]) {
ranks[start + i] = rightRanks[k];
mergeDiscordants += leftLenRemain;
k++;
}
i++;
}
return leftDiscordants + rightDiscordants + mergeDiscordants;
}
void CorrelationCoefficient::convertToRanks(const Column* col, int N, QMap &ranks) {
if (!isNumericOrInteger(col))
return;
//QDEBUG("in convert to ranks");
double* sortedList = new double[N];
for (int i = 0; i < N; i++)
sortedList[i] = col->valueAt(i);
std::sort(sortedList, sortedList + N, std::greater());
// QString debug_sortedList = "";
ranks.clear();
for (int i = 0; i < N; i++) {
ranks[sortedList[i]] = i + 1;
// debug_sortedList += ", " + QString::number(sortedList[i]);
}
//QDEBUG("sorted list is " << debug_sortedList);
delete[] sortedList;
}
void CorrelationCoefficient::convertToRanks(const Column* col, QMap &ranks) {
convertToRanks(col, findCount(col), ranks);
}
/***********************************************Virtual Functions******************************************************************/
QWidget* CorrelationCoefficient::view() const {
if (!m_partView) {
m_view = new CorrelationCoefficientView(const_cast(this));
m_partView = m_view;
}
return m_partView;
}
diff --git a/src/backend/generalTest/CorrelationCoefficient.h b/src/backend/generalTest/CorrelationCoefficient.h
index 24af099db..98adcde02 100644
--- a/src/backend/generalTest/CorrelationCoefficient.h
+++ b/src/backend/generalTest/CorrelationCoefficient.h
@@ -1,78 +1,71 @@
/***************************************************************************
File : CorrelationCoefficient.h
Project : LabPlot
Description : Finding Correlation Coefficient on data provided
--------------------------------------------------------------------
Copyright : (C) 2019 Devanshu Agarwal(agarwaldevanshu8@gmail.com)
***************************************************************************/
/***************************************************************************
* *
* This program is free software; you can redistribute it and/or modify *
* it under the terms of the GNU General Public License as published by *
* the Free Software Foundation; either version 2 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU General Public License for more details. *
* *
* You should have received a copy of the GNU General Public License *
* along with this program; if not, write to the Free Software *
* Foundation, Inc., 51 Franklin Street, Fifth Floor, *
* Boston, MA 02110-1301 USA *
* *
***************************************************************************/
#ifndef CORRELATIONCOEFFICIENT_H
#define CORRELATIONCOEFFICIENT_H
-#include "backend/core/AbstractPart.h"
#include "GeneralTest.h"
-#include "backend/lib/macros.h"
class CorrelationCoefficientView;
-class Spreadsheet;
-class QString;
-class Column;
-class QVBoxLayout;
-class QLabel;
class CorrelationCoefficient : public GeneralTest {
Q_OBJECT
public:
explicit CorrelationCoefficient(const QString& name);
~CorrelationCoefficient() override;
enum Test{
Pearson,
Kendall,
Spearman
};
double correlationValue() const;
QList statisticValue() const;
QList pValue() const;
QWidget* view() const override;
- void performTest(Test m_test, bool categoricalVariable = true);
+ void performTest(Test m_test, bool categoricalVariable = false);
private:
void performPearson(bool categoricalVariable);
void performKendall();
void performSpearman();
int findDiscordants(int* ranks, int start, int end);
void convertToRanks(const Column* col, int N, QMap &ranks);
void convertToRanks(const Column* col, QMap &ranks);
double m_correlationValue;
QList m_statisticValue;
QList m_pValue;
};
#endif // CORRELATIONCOEFFICIENT_H
diff --git a/src/backend/generalTest/HypothesisTest.cpp b/src/backend/generalTest/HypothesisTest.cpp
index 16057794b..4ab760228 100644
--- a/src/backend/generalTest/HypothesisTest.cpp
+++ b/src/backend/generalTest/HypothesisTest.cpp
@@ -1,1143 +1,1143 @@
/***************************************************************************
File : HypothesisTest.cpp
Project : LabPlot
Description : Doing Hypothesis-Test on data provided
--------------------------------------------------------------------
Copyright : (C) 2019 Devanshu Agarwal(agarwaldevanshu8@gmail.com)
***************************************************************************/
/***************************************************************************
* *
* This program is free software; you can redistribute it and/or modify *
* it under the terms of the GNU General Public License as published by *
* the Free Software Foundation; either version 2 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU General Public License for more details. *
* *
* You should have received a copy of the GNU General Public License *
* along with this program; if not, write to the Free Software *
* Foundation, Inc., 51 Franklin Street, Fifth Floor, *
* Boston, MA 02110-1301 USA *
* *
***************************************************************************/
#include "HypothesisTest.h"
#include "kdefrontend/generalTest/HypothesisTestView.h"
#include "backend/spreadsheet/Spreadsheet.h"
#include "backend/core/column/Column.h"
#include "backend/lib/macros.h"
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
extern "C" {
#include "backend/nsl/nsl_stats.h"
}
HypothesisTest::HypothesisTest(const QString &name) : GeneralTest (name, AspectType::HypothesisTest) {
}
HypothesisTest::~HypothesisTest() {
}
void HypothesisTest::setPopulationMean(QVariant m_populationMean) {
m_populationMean = m_populationMean.toDouble();
}
void HypothesisTest::setSignificanceLevel(QVariant alpha) {
m_significanceLevel = alpha.toDouble();
}
void HypothesisTest::performTest(Test test, bool categoricalVariable, bool equalVariance) {
m_tailType = test.tail;
m_pValue.clear();
m_statisticValue.clear();
m_statsTable = "";
m_tooltips.clear();
for (int i = 0; i < RESULTLINESCOUNT; i++)
m_resultLine[i]->clear();
switch (test.subtype) {
case HypothesisTest::Test::SubType::TwoSampleIndependent: {
m_currTestName = "" + i18n("Two Sample Independent Test") + "";
performTwoSampleIndependentTest(test.type, categoricalVariable, equalVariance);
break;
}
case HypothesisTest::Test::SubType::TwoSamplePaired:
m_currTestName = "" + i18n("Two Sample Paired Test") + "";
performTwoSamplePairedTest(test.type);
break;
case HypothesisTest::Test::SubType::OneSample: {
m_currTestName = "" + i18n("One Sample Test") + "";
performOneSampleTest(test.type);
break;
}
case HypothesisTest::Test::SubType::OneWay: {
m_currTestName = "" + i18n("One Way Anova") + "";
performOneWayAnova();
break;
}
case HypothesisTest::Test::SubType::TwoWay: {
m_currTestName = "" + i18n("Two Way Anova") + "";
performTwoWayAnova();
break;
}
case HypothesisTest::Test::SubType::NoneSubType:
break;
}
emit changed();
}
void HypothesisTest::performLeveneTest(bool categoricalVariable) {
m_pValue.clear();
m_statisticValue.clear();
m_statsTable = "";
m_tooltips.clear();
for (int i = 0; i < RESULTLINESCOUNT; i++)
m_resultLine[i]->clear();
m_currTestName = "" + i18n("Levene Test for Equality of Variance") + "";
m_performLeveneTest(categoricalVariable);
emit changed();
}
QList& HypothesisTest::statisticValue(){
return m_statisticValue;
}
QList& HypothesisTest::pValue(){
return m_pValue;
}
/******************************************************************************
* Private Implementations
* ****************************************************************************/
//TODO: backend of z test;
//TODO: add tooltip to tables. (currently it is not possible to use with QTextDocument);
//TODO: use https://www.gnu.org/software/gsl/doc/html/statistics.html for basic statistic calculations
/**************************Two Sample Independent *************************************/
void HypothesisTest::performTwoSampleIndependentTest(HypothesisTest::Test::Type test, bool categoricalVariable, bool equalVariance) {
if (m_columns.size() != 2) {
printError("Inappropriate number of m_columns selected");
return;
}
int n[2];
double sum[2], mean[2], std[2];
QString col1Name = m_columns[0]->name();
QString col2Name = m_columns[1]->name();
if (!categoricalVariable && isNumericOrInteger(m_columns[0])) {
for (int i = 0; i < 2; i++) {
findStats(m_columns[i], n[i], sum[i], mean[i], std[i]);
if (n[i] == 0) {
- printError("Atleast two values should be there in every column");
+ printError("At least two values should be there in every column");
return;
}
- if (std[i] <= 0) {
- printError(i18n("Standard Deviation of atleast one column is equal to 0: last column is: %1", m_columns[i]->name()));
+ if (gsl_fcmp(std[i], 0., 1.e-16)) {
+ printError(i18n("Standard Deviation of at least one column is equal to 0: last column is: %1", m_columns[i]->name()));
return;
}
}
} else {
QMap colName;
QString baseColName;
int np;
int totalRows;
countPartitions(m_columns[0], np, totalRows);
if (np != 2) {
printError( i18n("Number of Categorical Variable in Column %1 is not equal to 2", m_columns[0]->name()));
return;
}
if (isNumericOrInteger(m_columns[0]))
baseColName = m_columns[0]->name();
ErrorType errorCode = findStatsCategorical(m_columns[0], m_columns[1], n, sum, mean, std, colName, np, totalRows);
switch (errorCode) {
case ErrorUnqualSize: {
printError( i18n("Unequal size between Column %1 and Column %2", m_columns[0]->name(), m_columns[1]->name()));
return;
}
case ErrorEmptyColumn: {
printError("At least one of selected column is empty");
return;
}
case NoError:
break;
}
QMapIterator i(colName);
while (i.hasNext()) {
i.next();
if (i.value() == 1)
col1Name = baseColName + " " + i.key();
else
col2Name = baseColName + " " + i.key();
}
}
QVariant rowMajor[] = {"", "N", "Sum", "Mean", "Std",
col1Name, n[0], sum[0], mean[0], std[0],
col2Name, n[1], sum[1], mean[1], std[1]
};
m_statsTable = getHtmlTable(3, 5, rowMajor);
for (int i = 0; i < 2; i++) {
if (n[i] == 0) {
- printError("Atleast two values should be there in every column");
+ printError("At least two values should be there in every column");
return;
}
- if (std[i] <= 0) {
- printError( i18n("Standard Deviation of atleast one column is equal to 0: last column is: %1", m_columns[i]->name()));
+ if (gsl_fcmp(std[i], 0., 1.e-16)) {
+ printError( i18n("Standard Deviation of at least one column is equal to 0: last column is: %1", m_columns[i]->name()));
return;
}
}
QString testName;
int df = 0;
double sp = 0;
switch (test) {
case HypothesisTest::Test::Type::TTest: {
testName = "T";
if (equalVariance) {
df = n[0] + n[1] - 2;
sp = qSqrt(((n[0]-1) * gsl_pow_2(std[0]) +
(n[1]-1) * gsl_pow_2(std[1]) ) / df );
m_statisticValue.append((mean[0] - mean[1]) / (sp * qSqrt(1.0/n[0] + 1.0/n[1])));
printLine(9, "Assumption: Equal Variance b/w both population means");
} else {
double temp_val;
temp_val = gsl_pow_2( gsl_pow_2(std[0]) / n[0] + gsl_pow_2(std[1]) / n[1]);
temp_val = temp_val / ( (gsl_pow_2( (gsl_pow_2(std[0]) / n[0]) ) / (n[0]-1)) +
(gsl_pow_2( (gsl_pow_2(std[1]) / n[1]) ) / (n[1]-1)));
df = qRound(temp_val);
m_statisticValue.append((mean[0] - mean[1]) / (qSqrt( (gsl_pow_2(std[0])/n[0]) +
(gsl_pow_2(std[1])/n[1]))));
printLine(9, "Assumption: UnEqual Variance b/w both population means");
}
printLine(8, "Assumption: Both Populations approximately follow normal distribution");
break;
}
case HypothesisTest::Test::Type::ZTest: {
testName = "Z";
sp = qSqrt( ((n[0]-1) * gsl_pow_2(std[0]) + (n[1]-1) * gsl_pow_2(std[1])) / df);
m_statisticValue.append((mean[0] - mean[1]) / (sp * qSqrt( 1.0 / n[0] + 1.0 / n[1])));
// m_pValue.append(gsl_cdf_gaussian_P(m_statisticValue, sp));
break;
}
case HypothesisTest::Test::Type::Anova:
case HypothesisTest::Test::Type::NoneType:
break;
}
m_currTestName = "" + i18n("Two Sample Independent %1 Test for %2 vs %3", testName, col1Name, col2Name) + "";
m_pValue.append(getPValue(test, m_statisticValue[0], col1Name, col2Name, (mean[0] - mean[1]), sp, df));
printLine(2, i18n("Significance level is %1", round(m_significanceLevel)), "blue");
printLine(4, i18n("%1 Value is %2 ", testName, round(m_statisticValue[0])), "green");
printTooltip(4, i18n("More is the |%1-value|, more safely we can reject the null hypothesis", testName));
printLine(5, i18n("P Value is %1 ", m_pValue[0]), "green");
printLine(6, i18n("Degree of Freedom is %1", df), "green");
printTooltip(6, i18n("Number of independent Pieces of information that went into calculating the estimate"));
if (m_pValue[0] <= m_significanceLevel)
printTooltip(5, i18n("We can safely reject Null Hypothesis for significance level %1", round(m_significanceLevel)));
else
printTooltip(5, i18n("There is a plausibility for Null Hypothesis to be true"));
return;
}
/********************************Two Sample Paired ***************************************/
void HypothesisTest::performTwoSamplePairedTest(HypothesisTest::Test::Type test) {
if (m_columns.size() != 2) {
printError("Inappropriate number of m_columns selected");
return;
}
for (int i = 0; i < 2; i++) {
if ( !isNumericOrInteger(m_columns[0])) {
printError("select only m_columns with numbers");
return;
}
}
int n;
double sum, mean, std;
ErrorType errorCode = findStatsPaired(m_columns[0], m_columns[1], n, sum, mean, std);
switch (errorCode) {
case ErrorUnqualSize: {
printError("both m_columns are having different sizes");
return;
}
case ErrorEmptyColumn: {
printError("m_columns are empty");
return;
}
case NoError:
break;
}
QVariant rowMajor[] = {"", "N", "Sum", "Mean", "Std",
"difference", n, sum, mean, std
};
m_statsTable = getHtmlTable(2, 5, rowMajor);
- if (std <= 0) {
+ if (gsl_fcmp(std, 0., 1.e-16)) {
printError("Standard deviation of the difference is 0");
return;
}
QString testName;
int df = 0;
switch (test) {
case HypothesisTest::Test::Type::TTest: {
m_statisticValue[0] = mean / (std / qSqrt(n));
df = n - 1;
testName = "T";
printLine(6, i18n("Degree of Freedom is %1 |
name(), i18n("%1", m_populationMean), mean, std, df));
m_currTestName = "" + i18n("One Sample %1 Test for %2 vs %3", testName, m_columns[0]->name(), m_columns[1]->name()) + "
";
printLine(2, i18n("Significance level is %1 ", round(m_significanceLevel)), "blue");
printLine(4, i18n("%1 Value is %2 ", testName, round(m_statisticValue[0])), "green");
printLine(5, i18n("P Value is %1 ", m_pValue[0]), "green");
if (m_pValue[0] <= m_significanceLevel)
printTooltip(5, i18n("We can safely reject Null Hypothesis for significance level %1", m_significanceLevel));
else
printTooltip(5, i18n("There is a plausibility for Null Hypothesis to be true"));
return;
}
/******************************** One Sample ***************************************/
void HypothesisTest::performOneSampleTest(HypothesisTest::Test::Type test) {
if (m_columns.size() != 1) {
printError("Inappropriate number of m_columns selected");
return;
}
if ( !isNumericOrInteger(m_columns[0])) {
printError("select only m_columns with numbers");
return;
}
int n;
double sum, mean, std;
ErrorType errorCode = findStats(m_columns[0], n, sum, mean, std);
switch (errorCode) {
case ErrorEmptyColumn: {
printError("column is empty");
return;
}
case NoError:
break;
case ErrorUnqualSize: {
return;
}
}
QVariant rowMajor[] = {"", "N", "Sum", "Mean", "Std",
m_columns[0]->name(), n, sum, mean, std
};
m_statsTable = getHtmlTable(2, 5, rowMajor);
- if (std <= 0) {
+ if (gsl_fcmp(std, 0., 1.e-16)) {
printError("Standard deviation is 0");
return;
}
QString testName;
int df = 0;
switch (test) {
case HypothesisTest::Test::Type::TTest: {
testName = "T";
m_statisticValue.append((mean - m_populationMean) / (std / qSqrt(n)));
df = n - 1;
printLine(6, i18n("Degree of Freedom is %1", df), "blue");
break;
}
case HypothesisTest::Test::Type::ZTest: {
testName = "Z";
df = 0;
m_statisticValue.append((mean - m_populationMean) / (std / qSqrt(n)));
break;
}
case HypothesisTest::Test::Type::Anova:
case HypothesisTest::Test::Type::NoneType:
break;
}
m_pValue.append(getPValue(test, m_statisticValue[0], m_columns[0]->name(), i18n("%1",m_populationMean), mean - m_populationMean, std, df));
m_currTestName = "" + i18n("One Sample %1 Test for %2", testName, m_columns[0]->name()) + "
";
printLine(2, i18n("Significance level is %1", round(m_significanceLevel)), "blue");
printLine(4, i18n("%1 Value is %2", testName, round(m_statisticValue[0])), "green");
printLine(5, i18n("P Value is %1", m_pValue[0]), "green");
if (m_pValue[0] <= m_significanceLevel)
printTooltip(5, i18n("We can safely reject Null Hypothesis for significance level %1", m_significanceLevel));
else
printTooltip(5, i18n("There is a plausibility for Null Hypothesis to be true"));
return;
}
/*************************************One Way Anova***************************************/
// all standard variables and formulas are taken from this wikipedia page:
// https://en.wikipedia.org/wiki/One-way_analysis_of_variance
// b stands for b/w groups
// w stands for within groups
// np is number of partition i.e., number of classes
void HypothesisTest::performOneWayAnova() {
int np, totalRows;
countPartitions(m_columns[0], np, totalRows);
int* ni = new int[np];
double* sum = new double[np];
double* mean = new double[np];
double* std = new double[np];
QString* colNames = new QString[np];
QMap classnameToIndex;
QString baseColName;
if (isNumericOrInteger(m_columns[0]))
baseColName = m_columns[0]->name();
findStatsCategorical(m_columns[0], m_columns[1], ni, sum, mean, std, classnameToIndex, np, totalRows);
double yBar = 0; // overall mean
double sB = 0; // sum of squares of (mean - overall_mean) between the groups
int fB = 0; // degree of freedom between the groups
double msB = 0; // mean sum of squares between the groups
double sW = 0; // sum of squares of (value - mean of group) within the groups
int fW = 0; // degree of freedom within the group
double msW = 0; // mean sum of squares within the groups
// now finding mean of each group;
for (int i = 0; i < np; i++)
yBar += mean[i];
yBar = yBar / np;
for (int i = 0; i < np; i++) {
sB += ni[i] * gsl_pow_2( ( mean[i] - yBar));
if (ni[i] > 1)
sW += gsl_pow_2( std[i])*(ni[i] - 1);
else
sW += gsl_pow_2( std[i]);
fW += ni[i] - 1;
}
fB = np - 1;
msB = sB / fB;
msW = sW / fW;
m_statisticValue.append(msB / msW);
m_pValue.append(nsl_stats_fdist_p(m_statisticValue[0], static_cast(np-1), fW));
QMapIterator i(classnameToIndex);
while (i.hasNext()) {
i.next();
colNames[i.value()-1] = baseColName + " " + i.key();
}
// now printing the statistics and result;
int rowCount = np + 1, columnCount = 5;
QVariant* rowMajor = new QVariant[rowCount*columnCount];
// header data;
rowMajor[0] = "";
rowMajor[1] = "Ni";
rowMajor[2] = "Sum";
rowMajor[3] = "Mean";
rowMajor[4] = "Std";
// table data
for (int row_i = 1; row_i < rowCount ; row_i++) {
rowMajor[row_i*columnCount] = colNames[row_i - 1];
rowMajor[row_i*columnCount + 1] = ni[row_i - 1];
rowMajor[row_i*columnCount + 2] = sum[row_i - 1];
rowMajor[row_i*columnCount + 3] = mean[row_i - 1];
rowMajor[row_i*columnCount + 4] = std[row_i - 1];
}
m_statsTable = "" + i18n("Group Summary Statistics") + "
";
m_statsTable += getHtmlTable(rowCount, columnCount, rowMajor);
m_statsTable += getLine("");
m_statsTable += getLine("");
m_statsTable += "" + i18n("Grand Summary Statistics") + "
";
m_statsTable += getLine("");
m_statsTable += getLine(i18n("Overall Mean is %1", round(yBar)));
rowCount = 4;
columnCount = 3;
rowMajor->clear();
rowMajor[0] = "";
rowMajor[1] = "Between Groups";
rowMajor[2] = "Within Groups";
int baseIndex = 0;
baseIndex = 1 * columnCount;
rowMajor[baseIndex + 0] = "Sum of Squares";
rowMajor[baseIndex + 1] = sB;
rowMajor[baseIndex + 2] = sW;
baseIndex = 2 * columnCount;
rowMajor[baseIndex + 0] = "Degree of Freedom";
rowMajor[baseIndex + 1] = fB;
rowMajor[baseIndex + 2] = fW;
baseIndex = 3 * columnCount;
rowMajor[baseIndex + 0] = "Mean Square Value";
rowMajor[baseIndex + 1] = msB;
rowMajor[baseIndex + 2] = msW;
m_statsTable += getHtmlTable(rowCount, columnCount, rowMajor);
delete[] ni;
delete[] sum;
delete[] mean;
delete[] std;
delete[] colNames;
printLine(1, i18n("F Value is %1", round(m_statisticValue[0])), "green");
printLine(2, i18n("P Value is %1 ", m_pValue[0]), "green");
if (m_pValue[0] <= m_significanceLevel)
printTooltip(2, i18n("We can safely reject Null Hypothesis for significance level %1", m_significanceLevel));
else
printTooltip(2, i18n("There is a plausibility for Null Hypothesis to be true"));
return;
}
/*************************************Two Way Anova***************************************/
// all formulas and symbols are taken from: http://statweb.stanford.edu/~susan/courses/s141/exanova.pdf
//TODO: suppress warning of variable length array are a C99 feature.
//TODO: add assumptions verification option
//TODO: add tail option (if needed)
void HypothesisTest::performTwoWayAnova() {
int np_a, totalRows_a;
int np_b, totalRows_b;
countPartitions(m_columns[0], np_a, totalRows_a);
countPartitions(m_columns[1], np_b, totalRows_b);
double groupMean[np_a][np_b];
int replicates[np_a][np_b];
for (int i = 0; i < np_a; i++)
for (int j = 0; j < np_b; j++) {
groupMean[i][j] = 0;
replicates[i][j] = 0;
}
if (totalRows_a != totalRows_b) {
- printError("There is missing data in atleast one of the rows");
+ printError("There is missing data in at least one of the rows");
return;
}
QMap catToNumber_a;
QMap catToNumber_b;
int partitionNumber_a = 1;
int partitionNumber_b = 1;
for (int i = 0; i < totalRows_a; i++) {
QString name_a = m_columns[0]->textAt(i);
QString name_b = m_columns[1]->textAt(i);
double value = m_columns[2]->valueAt(i);
if (catToNumber_a[name_a] == 0) {
catToNumber_a[name_a] = partitionNumber_a;
partitionNumber_a++;
}
if (catToNumber_b[name_b] == 0) {
catToNumber_b[name_b] = partitionNumber_b;
partitionNumber_b++;
}
groupMean[catToNumber_a[name_a] - 1][catToNumber_b[name_b] - 1] += value;
replicates[catToNumber_a[name_a] - 1][catToNumber_b[name_b] - 1] += 1;
}
int replicate = replicates[0][0];
for (int i = 0; i < np_a; i++)
for (int j = 0; j < np_b; j++) {
if (replicates[i][j] == 0) {
- printError("Dataset should have atleast one data value corresponding to each feature combination");
+ printError("Dataset should have at least one data value corresponding to each feature combination");
return;
}
if (replicates[i][j] != replicate) {
printError("Number of experiments perfomed for each combination of levels
"
"between Independet Var.1 and Independent Var.2 must be equal");
return;
}
groupMean[i][j] /= replicates[i][j];
}
double ss_within = 0;
for (int i = 0; i < totalRows_a; i++) {
QString name_a = m_columns[0]->textAt(i);
QString name_b = m_columns[1]->textAt(i);
double value = m_columns[2]->valueAt(i);
ss_within += gsl_pow_2(value - groupMean[catToNumber_a[name_a] - 1][catToNumber_b[name_b] - 1]);
}
int df_within = (replicate - 1) * np_a * np_b;
double ms_within = ss_within / df_within;
double* mean_a = new double[np_a];
double* mean_b = new double[np_b];
for (int i = 0; i < np_a; i++) {
for (int j = 0; j < np_b; j++) {
mean_a[i] += groupMean[i][j] / np_b;
mean_b[j] += groupMean[i][j] / np_a;
}
}
double mean = 0;
for (int i = 0; i < np_a; i++)
mean += mean_a[i] / np_a;
double ss_a = 0;
for (int i = 0; i < np_a; i++)
ss_a += gsl_pow_2(mean_a[i] - mean);
ss_a *= replicate * np_b;
int df_a = np_a - 1;
double ms_a = ss_a / df_a;
double ss_b = 0;
for (int i = 0; i < np_b; i++)
ss_b += gsl_pow_2(mean_b[i] - mean);
ss_b *= replicate * np_a;
int df_b = np_b - 1;
double ms_b = ss_b / df_b;
double ss_interaction = 0;
for (int i = 0; i < np_a; i++)
for (int j = 0; j < np_b; j++)
ss_interaction += gsl_pow_2(groupMean[i][j] - mean_a[i] - mean_b[j] + mean);
ss_interaction *= replicate;
int df_interaction = (np_a - 1) * (np_b - 1);
double ms_interaction = ss_interaction / df_interaction;
QString* partitionNames_a = new QString[np_a];
QString* partitionNames_b = new QString[np_b];
QMapIterator itr_a(catToNumber_a);
while (itr_a.hasNext()) {
itr_a.next();
partitionNames_a[itr_a.value()-1] = itr_a.key();
}
QMapIterator itr_b(catToNumber_b);
while (itr_b.hasNext()) {
itr_b.next();
partitionNames_b[itr_b.value()-1] = itr_b.key();
}
// printing table;
// cell constructor structure; data, level, rowSpanCount, m_columnspanCount, isHeader;
QList rowMajor;
rowMajor.append(new Cell("", 0, true, "", 2, 1));
for (int i = 0; i < np_b; i++)
rowMajor.append(new Cell(partitionNames_b[i], 0, true, "", 1, 2));
rowMajor.append(new Cell("Mean", 0, true, "", 2));
for (int i = 0; i < np_b; i++) {
rowMajor.append(new Cell("Mean", 1, true));
rowMajor.append(new Cell("Replicate", 1, true));
}
int level = 2;
for (int i = 0; i < np_a; i++) {
rowMajor.append(new Cell(partitionNames_a[i], level, true));
for (int j = 0; j < np_b; j++) {
rowMajor.append(new Cell(round(groupMean[i][j]), level));
rowMajor.append(new Cell(replicates[i][j], level));
}
rowMajor.append(new Cell(round(mean_a[i]), level));
level++;
}
rowMajor.append(new Cell("Mean", level, true));
for (int i = 0; i < np_b; i++)
rowMajor.append(new Cell(round(mean_b[i]), level, false, "", 1, 2));
rowMajor.append(new Cell(round(mean), level));
m_statsTable = "" + i18n("Contingency Table") + "";
m_statsTable += getHtmlTable3(rowMajor);
m_statsTable += "";
m_statsTable += "" + i18n("results table") + "";
rowMajor.clear();
level = 0;
rowMajor.append(new Cell("", level, true));
rowMajor.append(new Cell("SS", level, true));
rowMajor.append(new Cell("DF", level, true, "degree of freedom"));
rowMajor.append(new Cell("MS", level, true));
level++;
rowMajor.append(new Cell(m_columns[0]->name(), level, true));
rowMajor.append(new Cell(round(ss_a), level));
rowMajor.append(new Cell(df_a, level));
rowMajor.append(new Cell(round(ms_a), level));
level++;
rowMajor.append(new Cell(m_columns[1]->name(), level, true));
rowMajor.append(new Cell(round(ss_b), level));
rowMajor.append(new Cell(df_b, level));
rowMajor.append(new Cell(round(ms_b), level));
level++;
rowMajor.append(new Cell("Interaction", level, true));
rowMajor.append(new Cell(round(ss_interaction), level));
rowMajor.append(new Cell(df_interaction, level));
rowMajor.append(new Cell(round(ms_interaction), level));
level++;
rowMajor.append(new Cell("Within", level, true));
rowMajor.append(new Cell(round(ss_within), level));
rowMajor.append(new Cell(df_within, level));
rowMajor.append(new Cell(round(ms_within), level));
m_statsTable += getHtmlTable3(rowMajor);
double fValue_a = ms_a / ms_within;
double fValue_b = ms_b / ms_within;
double fValue_interaction = ms_interaction / ms_within;
double m_pValue_a = nsl_stats_fdist_p(fValue_a, static_cast(np_a - 1), df_a);
double m_pValue_b = nsl_stats_fdist_p(fValue_b, static_cast(np_b - 1), df_b);
printLine(0, "F(df" + m_columns[0]->name() + ", dfwithin) is " + round(fValue_a), "blue");
printLine(1, "F(df" + m_columns[1]->name() + ", dfwithin) is " + round(fValue_b), "blue");
printLine(2, "F(dfinteraction, dfwithin) is " + round(fValue_interaction), "blue");
printLine(4, "P(df" + m_columns[0]->name() + ", dfwithin) is " + round(m_pValue_a), "blue");
printLine(5, "P(df" + m_columns[1]->name() + ", dfwithin) is " + round(m_pValue_b), "blue");
// printLine(2, "P(dfinteraction, dfwithin) is " + round(fValue_interaction), "blue");
m_statisticValue.append(fValue_a);
m_statisticValue.append(fValue_b);
m_statisticValue.append(fValue_interaction);
m_pValue.append(m_pValue_a);
m_pValue.append(m_pValue_b);
delete[] mean_a;
delete[] mean_b;
delete[] partitionNames_a;
delete[] partitionNames_b;
return;
}
/**************************************Levene Test****************************************/
// Some reference to local variables.
// np = number of partitions
// df = degree of fredom
// totalRows = total number of rows in column
// these variables are taken from: https://en.wikipedia.org/wiki/Levene%27s_test
// yiBar = mean of ith group;
// Zij = |Yij - yiBar|
// ziBar = mean of Zij for group i
// ziBarBar = mean for all zij
// ni = number of elements in group i
void HypothesisTest::m_performLeveneTest(bool categoricalVariable) {
if (m_columns.size() != 2) {
printError("Inappropriate number of m_columns selected");
return;
}
int np = 0;
int n = 0;
if (!categoricalVariable && isNumericOrInteger(m_columns[0]))
np = m_columns.size();
else
countPartitions(m_columns[0], np, n);
if (np < 2) {
- printError("Select atleast two m_columns / classes");
+ printError("Select at least two m_columns / classes");
return;
}
double* yiBar = new double[np];
double* ziBar = new double[np];
double ziBarBar = 0;
double* ni = new double[np];
for (int i = 0; i < np; i++) {
yiBar[i] = 0;
ziBar[i] = 0;
ni[i] = 0;
}
double fValue;
int df = 0;
int totalRows = 0;
QString* colNames = new QString[np];
if (!categoricalVariable && isNumericOrInteger(m_columns[0])) {
totalRows = m_columns[0]->rowCount();
double value = 0;
for (int j = 0; j < totalRows; j++) {
int numberNaNCols = 0;
for (int i = 0; i < np; i++) {
value = m_columns[i]->valueAt(j);
if (std::isnan(value)) {
numberNaNCols++;
continue;
}
yiBar[i] += value;
ni[i]++;
n++;
}
if (numberNaNCols == np) {
totalRows = j;
break;
}
}
for (int i = 0; i < np; i++) {
if (ni[i] > 0)
yiBar[i] = yiBar[i] / ni[i];
else {
printError("One of the selected m_columns is empty "
"or have choosen Independent Var.1 wrongly");
return;
}
}
for (int j = 0; j < totalRows; j++) {
for (int i = 0; i < np; i++) {
value = m_columns[i]->valueAt(j);
if (!(std::isnan(value)))
ziBar[i] += fabs(value - yiBar[i]);
}
}
for (int i = 0; i < np; i++) {
ziBarBar += ziBar[i];
if (ni[i] > 0)
ziBar[i] = ziBar[i] / ni[i];
}
ziBarBar = ziBarBar / n;
double numberatorValue = 0;
double denominatorValue = 0;
for (int j = 0; j < totalRows; j++) {
for (int i = 0; i < np; i++) {
value = m_columns[i]->valueAt(j);
if (!(std::isnan(value))) {
double zij = fabs(value - yiBar[i]);
denominatorValue += gsl_pow_2( (zij - ziBar[i]));
}
}
}
- if (denominatorValue <= 0) {
+ if (gsl_fcmp(denominatorValue, 0. ,1.e-16)) {
printError( i18n("Denominator value is %1", denominatorValue));
return;
}
for (int i = 0; i < np; i++) {
colNames[i] = m_columns[i]->name();
numberatorValue += ni[i]*gsl_pow_2( (ziBar[i]-ziBarBar));
}
fValue = ((n - np) / (np - 1)) * (numberatorValue / denominatorValue);
} else {
QMap classnameToIndex;
AbstractColumn::ColumnMode originalColMode = m_columns[0]->columnMode();
m_columns[0]->setColumnMode(AbstractColumn::Text);
int partitionNumber = 1;
QString name;
double value;
int classIndex;
for (int j = 0; j < n; j++) {
name = m_columns[0]->textAt(j);
value = m_columns[1]->valueAt(j);
if (std::isnan(value)) {
n = j;
break;
}
if (classnameToIndex[name] == 0) {
classnameToIndex[name] = partitionNumber;
partitionNumber++;
}
classIndex = classnameToIndex[name]-1;
ni[classIndex]++;
yiBar[classIndex] += value;
}
for (int i = 0; i < np; i++) {
if (ni[i] > 0)
yiBar[i] = yiBar[i] / ni[i];
else {
printError("One of the selected m_columns is empty "
"or have choosen Independent Var.1 wrongly");
m_columns[0]->setColumnMode(originalColMode);
return;
}
}
for (int j = 0; j < n; j++) {
name = m_columns[0]->textAt(j);
value = m_columns[1]->valueAt(j);
classIndex = classnameToIndex[name] - 1;
ziBar[classIndex] += fabs(value - yiBar[classIndex]);
}
for (int i = 0; i < np; i++) {
ziBarBar += ziBar[i];
ziBar[i] = ziBar[i] / ni[i];
}
ziBarBar = ziBarBar / n;
double numberatorValue = 0;
double denominatorValue = 0;
for (int j = 0; j < n; j++) {
name = m_columns[0]->textAt(j);
value = m_columns[1]->valueAt(j);
classIndex = classnameToIndex[name] - 1;
double zij = fabs(value - yiBar[classIndex]);
denominatorValue += gsl_pow_2( (zij - ziBar[classIndex]));
}
for (int i = 0; i < np; i++)
numberatorValue += ni[i]*gsl_pow_2( (ziBar[i]-ziBarBar));
- if (denominatorValue <= 0) {
+ if (gsl_fcmp(denominatorValue, 0., 1.e-16)) {
printError( "number of data points is less or than equal to number of categorical variables");
m_columns[0]->setColumnMode(originalColMode);
return;
}
fValue = ((n - np) / (np - 1)) * (numberatorValue / denominatorValue);
QMapIterator i(classnameToIndex);
while (i.hasNext()) {
i.next();
colNames[i.value()-1] = m_columns[0]->name() + " " + i.key();
}
m_columns[0]->setColumnMode(originalColMode);
}
df = n - np;
// now making the stats table.
int rowCount = np+1;
int columnCount = 4;
QVariant* rowMajor = new QVariant[rowCount*columnCount];
// header data;
rowMajor[0] = "";
rowMajor[1] = "Ni";
rowMajor[2] = "yiBar";
rowMajor[3] = "ziBar";
// table data
for (int row_i = 1; row_i < rowCount; row_i++) {
rowMajor[row_i*columnCount] = colNames[row_i-1];
rowMajor[row_i*columnCount + 1] = ni[row_i-1];
rowMajor[row_i*columnCount + 2] = yiBar[row_i-1];
rowMajor[row_i*columnCount + 3] = ziBar[row_i-1];
}
m_statsTable = getHtmlTable(rowCount, columnCount, rowMajor);
delete[] rowMajor;
delete[] yiBar;
delete[] ziBar;
delete[] ni;
m_pValue.append(nsl_stats_fdist_p(fValue, static_cast(np-1), df));
printLine(0, "Null Hypothesis: Variance is equal between all classes", "blue");
printLine(1, "Alternate Hypothesis: Variance is not equal in at-least one pair of classes", "blue");
printLine(2, i18n("Significance level is %1", round(m_significanceLevel)), "blue");
printLine(4, i18n("F Value is %1 ", round(fValue)), "green");
printLine(5, i18n("P Value is %1 ", m_pValue[0]), "green");
printLine(6, i18n("Degree of Freedom is %1", df), "green");
if (m_pValue[0] <= m_significanceLevel) {
printTooltip(5, i18n("We can safely reject Null Hypothesis for significance level %1", m_significanceLevel));
printLine(8, "Requirement for homogeneity is not met", "red");
} else {
printTooltip(5, i18n("There is a plausibility for Null Hypothesis to be true"));
printLine(8, "Requirement for homogeneity is met", "green");
}
m_statisticValue.append(fValue);
return;
}
//TODO change ("⋖") symbol to ("<"), currently macro UTF8_QSTRING is not working properly if used "<" symbol;
// TODO: check for correctness between: for TestZ with TailTwo
// m_pValue.append(2*gsl_cdf_tdist_P(value, df) v/s
// m_pValue.append(gsl_cdf_tdis_P(value, df) + gsl_cdf_tdis_P(-value, df);
double HypothesisTest::getPValue(const HypothesisTest::Test::Type& test, double& value, const QString& col1Name, const QString& col2Name, const double mean, const double sp, const int df) {
switch (test) {
case HypothesisTest::Test::Type::TTest: {
switch (m_tailType) {
case HypothesisTest::Test::Tail::Negative: {
m_pValue.append(gsl_cdf_tdist_P(value, df));
printLine(0, i18n("Null Hypothesis: Population mean of %1 %2 Population mean of %3", col1Name, UTF8_QSTRING("≥"), col2Name), "blue");
printLine(1, i18n("Alternate Hypothesis: Population mean of %1 %2 Population mean of %3", col1Name, UTF8_QSTRING("⋖"), col2Name), "blue");
break;
}
case HypothesisTest::Test::Tail::Positive: {
value *= -1;
m_pValue.append(gsl_cdf_tdist_P(value, df));
printLine(0, i18n("Null Hypothesis: Population mean of %1 %2 Population mean of %3", col1Name, UTF8_QSTRING("≤"), col2Name), "blue");
printLine(1, i18n("Alternate Hypothesis: Population mean of %1 %2 Population mean of %3", col1Name, UTF8_QSTRING(">"), col2Name), "blue");
break;
}
case HypothesisTest::Test::Tail::Two: {
m_pValue.append(2.*gsl_cdf_tdist_P(-fabs(value), df));
printLine(0, i18n("Null Hypothesis: Population mean of %1 %2 Population mean of %3", col1Name, UTF8_QSTRING("="), col2Name), "blue");
printLine(1, i18n("Alternate Hypothesis: Population mean of %1 %2 Population mean of %3", col1Name, UTF8_QSTRING("≠"), col2Name), "blue");
break;
}
}
break;
}
case HypothesisTest::Test::Type::ZTest: {
switch (m_tailType) {
case HypothesisTest::Test::Tail::Negative: {
m_pValue.append(gsl_cdf_gaussian_P(value - mean, sp));
printLine(0, i18n("Null Hypothesis: Population mean of %1 %2 Population mean of %3 ", col1Name, UTF8_QSTRING("≥"), col2Name), "blue");
printLine(1, i18n("Alternate Hypothesis: Population mean of %1 %2 Population mean of %3 ", col1Name, UTF8_QSTRING("⋖"), col2Name), "blue");
break;
}
case HypothesisTest::Test::Tail::Positive: {
value *= -1;
m_pValue.append(nsl_stats_tdist_p(value - mean, sp));
printLine(0, i18n("Null Hypothesis: Population mean of %1 %2 Population mean of %3 ", col1Name, UTF8_QSTRING("≤"), col2Name), "blue");
printLine(1, i18n("Alternate Hypothesis: Population mean of %1 %2 Population mean of %3 ", col1Name, UTF8_QSTRING(">"), col2Name), "blue");
break;
}
case HypothesisTest::Test::Tail::Two: {
m_pValue.append(2.*gsl_cdf_gaussian_P(value - mean, sp));
printLine(0, i18n("Null Hypothesis: Population mean of %1 %2 Population mean of %3 ", col1Name, UTF8_QSTRING("="), col2Name), "blue");
printLine(1, i18n("Alternate Hypothesis: Population mean of %1 %2 Population mean of %3 ", col1Name, UTF8_QSTRING("≠"), col2Name), "blue");
break;
}
}
break;
}
case HypothesisTest::Test::Type::Anova:
case HypothesisTest::Test::Type::NoneType:
break;
}
if (m_pValue[0] > 1)
return 1;
return m_pValue[0];
}
// Virtual functions
QWidget* HypothesisTest::view() const {
if (!m_partView) {
m_view = new HypothesisTestView(const_cast(this));
m_partView = m_view;
}
return m_partView;
}
diff --git a/src/backend/generalTest/HypothesisTest.h b/src/backend/generalTest/HypothesisTest.h
index ef00b8906..2723f899e 100644
--- a/src/backend/generalTest/HypothesisTest.h
+++ b/src/backend/generalTest/HypothesisTest.h
@@ -1,100 +1,91 @@
/***************************************************************************
File : HypothesisTest.h
Project : LabPlot
Description : Doing Hypothesis-Test on data provided
--------------------------------------------------------------------
Copyright : (C) 2019 Devanshu Agarwal(agarwaldevanshu8@gmail.com)
***************************************************************************/
/***************************************************************************
* *
* This program is free software; you can redistribute it and/or modify *
* it under the terms of the GNU General Public License as published by *
* the Free Software Foundation; either version 2 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU General Public License for more details. *
* *
* You should have received a copy of the GNU General Public License *
* along with this program; if not, write to the Free Software *
* Foundation, Inc., 51 Franklin Street, Fifth Floor, *
* Boston, MA 02110-1301 USA *
* *
***************************************************************************/
#ifndef HYPOTHESISTEST_H
#define HYPOTHESISTEST_H
-#include "backend/core/AbstractPart.h"
#include "GeneralTest.h"
-#include "backend/lib/macros.h"
-
-class HypothesisTestView;
-class Spreadsheet;
-class QString;
-class Column;
-class QVBoxLayout;
-class QLabel;
class HypothesisTest : public GeneralTest {
Q_OBJECT
public:
explicit HypothesisTest(const QString& name);
~HypothesisTest() override;
struct Test {
enum Type {
NoneType = 0,
TTest = 1 << 0,
ZTest = 1 << 1,
Anova = 1 << 2
};
enum SubType {
NoneSubType = 0,
TwoSampleIndependent = 1 << 0,
TwoSamplePaired = 1 << 1,
OneSample = 1 << 2,
OneWay = 1 << 3,
TwoWay = 1 << 4
};
enum Tail {Positive, Negative, Two};
Type type = NoneType;
SubType subtype = NoneSubType;
Tail tail;
};
void setPopulationMean(QVariant populationMean);
void setSignificanceLevel(QVariant alpha);
void performTest(Test m_test, bool categoricalVariable = true, bool equalVariance = true);
void performLeveneTest(bool categoricalVariable);
QList& statisticValue();
QList& pValue();
QWidget* view() const override;
private:
void performTwoSampleIndependentTest(HypothesisTest::Test::Type test, bool categoricalVariable = false, bool equalVariance = true);
void performTwoSamplePairedTest(HypothesisTest::Test::Type test);
void performOneSampleTest(HypothesisTest::Test::Type test);
void performOneWayAnova();
void performTwoWayAnova();
void m_performLeveneTest(bool categoricalVariable);
double getPValue(const HypothesisTest::Test::Type& test, double& value,
const QString& col1Name, const QString& col2name,
const double mean, const double sp, const int df);
double m_populationMean;
double m_significanceLevel;
HypothesisTest::Test::Tail m_tailType;
QList m_pValue;
QList m_statisticValue;
};
#endif // HypothesisTest_H
diff --git a/src/kdefrontend/dockwidgets/CorrelationCoefficientDock.cpp b/src/kdefrontend/dockwidgets/CorrelationCoefficientDock.cpp
index 71d5ecad3..7d78d2ed8 100644
--- a/src/kdefrontend/dockwidgets/CorrelationCoefficientDock.cpp
+++ b/src/kdefrontend/dockwidgets/CorrelationCoefficientDock.cpp
@@ -1,514 +1,513 @@
/***************************************************************************
File : CorrelationCoefficientDock.cpp
Project : LabPlot
Description : widget for correlation test properties
--------------------------------------------------------------------
Copyright : (C) 2019 Devanshu Agarwal(agarwaldevanshu8@gmail.com)
***************************************************************************/
/***************************************************************************
* *
* This program is free software; you can redistribute it and/or modify *
* it under the terms of the GNU General Public License as published by *
* the Free Software Foundation; either version 2 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU General Public License for more details. *
* *
* You should have received a copy of the GNU General Public License *
* along with this program; if not, write to the Free Software *
* Foundation, Inc., 51 Franklin Street, Fifth Floor, *
* Boston, MA 02110-1301 USA *
* *
***************************************************************************/
#include "CorrelationCoefficientDock.h"
#include "backend/core/AspectTreeModel.h"
#include "backend/core/AbstractAspect.h"
#include "backend/core/Project.h"
#include "backend/spreadsheet/Spreadsheet.h"
#include "commonfrontend/widgets/TreeViewComboBox.h"
#include "kdefrontend/datasources/DatabaseManagerDialog.h"
#include "kdefrontend/datasources/DatabaseManagerWidget.h"
#include "kdefrontend/TemplateHandler.h"
#include
#include
#include
#include
#include
#include
#include
#include
/*!
\class CorrelationCoefficientDock
\brief Provides a dock (widget) for correlation testing:
\ingroup kdefrontend
*/
//TODO: To add tooltips in docks for non obvious widgets.
//TODO: Add functionality for database along with spreadsheet.
CorrelationCoefficientDock::CorrelationCoefficientDock(QWidget* parent) : QWidget(parent) {
ui.setupUi(this);
ui.cbDataSourceType->addItem(i18n("Spreadsheet"));
ui.cbDataSourceType->addItem(i18n("Database"));
cbSpreadsheet = new TreeViewComboBox;
ui.gridLayout->addWidget(cbSpreadsheet, 5, 4, 1, 3);
ui.bDatabaseManager->setIcon(QIcon::fromTheme("network-server-database"));
ui.bDatabaseManager->setToolTip(i18n("Manage connections"));
m_configPath = QStandardPaths::standardLocations(QStandardPaths::AppDataLocation).constFirst() + "sql_connections";
ui.cbTest->addItem( i18n("Pearson r"), CorrelationCoefficient::Test::Pearson);
ui.cbTest->addItem( i18n("Kendall"), CorrelationCoefficient::Test::Kendall);
ui.cbTest->addItem( i18n("Spearman"), CorrelationCoefficient::Test::Spearman);
// adding item to tests and testtype combo box;
// making all test blocks invisible at starting.
ui.lCategorical->hide();
ui.chbCategorical->hide();
ui.lCol1->hide();
ui.cbCol1->hide();
ui.lCol2->hide();
ui.cbCol2->hide();
ui.pbPerformTest->setEnabled(false);
ui.pbPerformTest->setIcon(QIcon::fromTheme("run-build"));
// readConnections();
connect(ui.cbDataSourceType, static_cast(&QComboBox::currentIndexChanged),
this, &CorrelationCoefficientDock::dataSourceTypeChanged);
connect(cbSpreadsheet, &TreeViewComboBox::currentModelIndexChanged, this, &CorrelationCoefficientDock::spreadsheetChanged);
// connect(ui.cbConnection, static_cast(&QComboBox::currentIndexChanged),
// this, &CorrelationCoefficientDock::connectionChanged);
// connect(ui.cbTable, static_cast(&QComboBox::currentIndexChanged),
// this, &CorrelationCoefficientDock::tableChanged);
// connect(ui.bDatabaseManager, &QPushButton::clicked, this, &CorrelationCoefficientDock::showDatabaseManager);
// connect(ui.bAddRow, &QPushButton::clicked, this, &CorrelationCoefficientDock::addRow);
// connect(ui.bRemoveRow, &QPushButton::clicked, this,&CorrelationCoefficientDock::removeRow);
// connect(ui.bAddColumn, &QPushButton::clicked, this, &CorrelationCoefficientDock::addColumn);
// connect(ui.bRemoveColumn, &QPushButton::clicked, this,&CorrelationCoefficientDock::removeColumn);
// connect(ui.cbCol1, static_cast(&QComboBox::currentIndexChanged), this, &CorrelationCoefficientDock::doTTest);
// connect(ui.cbCol2, static_cast(&QComboBox::currentIndexChanged), this, &CorrelationCoefficientDock::doTTest);
// connect(ui.lwFields, &QListWidget::itemSelectionChanged, this, [=]() {
// bool enabled = !ui.lwFields->selectedItems().isEmpty();
// ui.bAddRow->setEnabled(enabled);
// ui.bAddColumn->setEnabled(enabled);
// });
// connect(ui.lwRows, &QListWidget::doubleClicked, this,&CorrelationCoefficientDock::removeRow);
// connect(ui.lwRows, &QListWidget::itemSelectionChanged, this, [=]() {
// ui.bRemoveRow->setEnabled(!ui.lwRows->selectedItems().isEmpty());
// });
// connect(ui.lwColumns, &QListWidget::doubleClicked, this,&CorrelationCoefficientDock::removeColumn);
// connect(ui.lwColumns, &QListWidget::itemSelectionChanged, this, [=]() {
// ui.bRemoveColumn->setEnabled(!ui.lwColumns->selectedItems().isEmpty());
// });
connect(ui.cbTest, static_cast(&QComboBox::currentIndexChanged), this, &CorrelationCoefficientDock::showCorrelationCoefficient);
connect(ui.chbCategorical, &QCheckBox::stateChanged, this, &CorrelationCoefficientDock::changeCbCol2Label);
connect(ui.pbPerformTest, &QPushButton::clicked, this, &CorrelationCoefficientDock::findCorrelationCoefficient);
connect(ui.cbCol1, static_cast(&QComboBox::currentIndexChanged), this, &CorrelationCoefficientDock::col1IndexChanged);
ui.cbTest->setCurrentIndex(0);
emit ui.cbTest->currentIndexChanged(0);
}
void CorrelationCoefficientDock::setCorrelationCoefficient(CorrelationCoefficient* CorrelationCoefficient) {
m_initializing = true;
m_correlationCoefficient = CorrelationCoefficient;
m_aspectTreeModel = new AspectTreeModel(m_correlationCoefficient->project());
QList list{AspectType::Folder, AspectType::Workbook,
AspectType::Spreadsheet, AspectType::LiveDataSource};
cbSpreadsheet->setTopLevelClasses(list);
list = {AspectType::Spreadsheet, AspectType::LiveDataSource};
m_aspectTreeModel->setSelectableAspects(list);
cbSpreadsheet->setModel(m_aspectTreeModel);
//show the properties
ui.leName->setText(m_correlationCoefficient->name());
ui.leComment->setText(m_correlationCoefficient->comment());
ui.cbDataSourceType->setCurrentIndex(m_correlationCoefficient->dataSourceType());
if (m_correlationCoefficient->dataSourceType() == CorrelationCoefficient::DataSourceType::DataSourceSpreadsheet)
setModelIndexFromAspect(cbSpreadsheet, m_correlationCoefficient->dataSourceSpreadsheet());
// else
// ui.cbConnection->setCurrentIndex(ui.cbConnection->findText(m_correlationCoefficient->dataSourceConnection()));
setColumnsComboBoxModel(m_correlationCoefficient->dataSourceSpreadsheet());
this->dataSourceTypeChanged(ui.cbDataSourceType->currentIndex());
//setting rows and columns in combo box;
//undo functions
// connect(m_correlationCoefficient, SIGNAL(aspectDescriptionChanged(const AbstractAspect*)), this, SLOT(CorrelationCoefficientDescriptionChanged(const AbstractAspect*)));
m_initializing = false;
}
void CorrelationCoefficientDock::showCorrelationCoefficient() {
if (ui.cbTest->count() == 0)
return;
m_test = CorrelationCoefficient::Test(ui.cbTest->currentData().toInt());
ui.lCol1->show();
ui.cbCol1->show();
ui.lCol2->show();
ui.cbCol2->show();
ui.lCategorical->setVisible(bool(m_test & CorrelationCoefficient::Test::Pearson));
ui.chbCategorical->setVisible(bool(m_test & CorrelationCoefficient::Test::Pearson));
setColumnsComboBoxView();
ui.pbPerformTest->setEnabled(nonEmptySelectedColumns());
}
void CorrelationCoefficientDock::findCorrelationCoefficient() {
QVector cols;
if (ui.cbCol1->count() == 0)
return;
cols << reinterpret_cast(ui.cbCol1->currentData().toLongLong());
cols << reinterpret_cast(ui.cbCol2->currentData().toLongLong());
m_correlationCoefficient->setColumns(cols);
m_correlationCoefficient->performTest(m_test, ui.chbCategorical->isChecked());
}
void CorrelationCoefficientDock::setModelIndexFromAspect(TreeViewComboBox* cb, const AbstractAspect* aspect) {
if (aspect)
cb->setCurrentModelIndex(m_aspectTreeModel->modelIndexOfAspect(aspect));
else
cb->setCurrentModelIndex(QModelIndex());
}
////*************************************************************
////****** SLOTs for changes triggered in CorrelationCoefficientDock *******
////*************************************************************
//void CorrelationCoefficientDock::nameChanged() {
// if (m_initializing)
// return;
// m_correlationCoefficient->setName(ui.leName->text());
//}
//void CorrelationCoefficientDock::commentChanged() {
// if (m_initializing)
// return;
// m_correlationCoefficient->setComment(ui.leComment->text());
//}
void CorrelationCoefficientDock::dataSourceTypeChanged(int index) {
//QDEBUG("in dataSourceTypeChanged");
CorrelationCoefficient::DataSourceType type = static_cast(index);
bool showDatabase = (type == CorrelationCoefficient::DataSourceType::DataSourceDatabase);
ui.lSpreadsheet->setVisible(!showDatabase);
cbSpreadsheet->setVisible(!showDatabase);
ui.lConnection->setVisible(showDatabase);
ui.cbConnection->setVisible(showDatabase);
ui.bDatabaseManager->setVisible(showDatabase);
ui.lTable->setVisible(showDatabase);
ui.cbTable->setVisible(showDatabase);
if (m_initializing)
return;
m_correlationCoefficient->setComment(ui.leComment->text());
}
void CorrelationCoefficientDock::spreadsheetChanged(const QModelIndex& index) {
//QDEBUG("in spreadsheetChanged");
auto* aspect = static_cast(index.internalPointer());
Spreadsheet* spreadsheet = dynamic_cast(aspect);
setColumnsComboBoxModel(spreadsheet);
m_correlationCoefficient->setDataSourceSpreadsheet(spreadsheet);
}
void CorrelationCoefficientDock::col1IndexChanged(int index) {
if (index < 0) return;
changeCbCol2Label();
}
//void CorrelationCoefficientDock::connectionChanged() {
// if (ui.cbConnection->currentIndex() == -1) {
// ui.lTable->hide();
// ui.cbTable->hide();
// return;
// }
// //clear the previously shown tables
// ui.cbTable->clear();
// ui.lTable->show();
// ui.cbTable->show();
// const QString& connection = ui.cbConnection->currentText();
// //connection name was changed, determine the current connections settings
// KConfig config(m_configPath, KConfig::SimpleConfig);
// KConfigGroup group = config.group(connection);
// //close and remove the previos connection, if available
// if (m_db.isOpen()) {
// m_db.close();
// QSqlDatabase::removeDatabase(m_db.driverName());
// }
// //open the selected connection
// //QDEBUG("CorrelationCoefficientDock: connecting to " + connection);
// const QString& driver = group.readEntry("Driver");
// m_db = QSqlDatabase::addDatabase(driver);
// const QString& dbName = group.readEntry("DatabaseName");
// if (DatabaseManagerWidget::isFileDB(driver)) {
// if (!QFile::exists(dbName)) {
// KMessageBox::error(this, i18n("Couldn't find the database file '%1'. Please check the connection settings.", dbName),
// appendRow i18n("Connection Failed"));
// return;
// } else
// m_db.setDatabaseName(dbName);
// } else if (DatabaseManagerWidget::isODBC(driver)) {
// if (group.readEntry("CustomConnectionEnabled", false))
// m_db.setDatabaseName(group.readEntry("CustomConnectionString"));
// else
// m_db.setDatabaseName(dbName);
// } else {
// m_db.setDatabaseName(dbName);
// m_db.setHostName( group.readEntry("HostName") );
// m_db.setPort( group.readEntry("Port", 0) );
// m_db.setUserName( group.readEntry("UserName") );
// m_db.setPassword( group.readEntry("Password") );
// }
// WAIT_CURSOR;
// if (!m_db.open()) {
// RESET_CURSOR;
// KMessageBox::error(this, i18n("Failed to connect to the database '%1'. Please check the connection settings.", ui.cbConnection->currentText()) +
// QLatin1String("\n\n") + m_db.lastError().databaseText(),
// i18n("Connection Failed"));
// return;
// }
// //show all available database tables
// if (m_db.tables().size()) {
// for (auto table : m_db.tables())
// ui.cbTable->addItem(QIcon::fromTheme("view-form-table"), table);
// ui.cbTable->setCurrentIndex(0);
// }
// RESET_CURSOR;
// if (m_initializing)
// return;
//// m_correlationCoefficient->setDataSourceConnection(connection);
//}
//void CorrelationCoefficientDock::tableChanged() {
// const QString& table = ui.cbTable->currentText();
// //show all attributes of the selected table
//// for (const auto* col : spreadsheet->children()) {
//// QListWidgetItem* item = new QListWidgetItem(col->icon(), col->name());
//// ui.lwFields->addItem(item);
//// }
// if (m_initializing)
// return;
//// m_correlationCoefficient->setDataSourceTable(table);
//}
////*************************************************************
////******** SLOTs for changes triggered in Spreadsheet *********
////*************************************************************
void CorrelationCoefficientDock::CorrelationCoefficientDescriptionChanged(const AbstractAspect* aspect) {
if (m_correlationCoefficient != aspect)
return;
m_initializing = true;
if (aspect->name() != ui.leName->text())
ui.leName->setText(aspect->name());
else if (aspect->comment() != ui.leComment->text())
ui.leComment->setText(aspect->comment());
m_initializing = false;
}
void CorrelationCoefficientDock::changeCbCol2Label() {
if (ui.cbCol1->count() == 0) return;
QString selected_text = ui.cbCol1->currentText();
Column* col1 = m_correlationCoefficient->dataSourceSpreadsheet()->column(selected_text);
if (bool(m_test & (CorrelationCoefficient::Test::Kendall | CorrelationCoefficient::Test::Spearman)) ||
(!ui.chbCategorical->isChecked() &&
(col1->columnMode() == AbstractColumn::Integer || col1->columnMode() == AbstractColumn::Numeric))) {
ui.lCol2->setText( i18n("Independent Var. 2"));
ui.chbCategorical->setChecked(false);
ui.chbCategorical->setEnabled(true);
} else {
ui.lCol2->setText( i18n("Dependent Var. 1"));
if (!ui.chbCategorical->isChecked())
ui.chbCategorical->setEnabled(false);
else
ui.chbCategorical->setEnabled(true);
ui.chbCategorical->setChecked(true);
}
}
////*************************************************************
////******************** SETTINGS *******************************
////*************************************************************
//void CorrelationCoefficientDock::load() {
//}
//void CorrelationCoefficientDock::loadConfigFromTemplate(KConfig& config) {
// Q_UNUSED(config);
//}
///*!
// loads saved matrix properties from \c config.
// */
//void CorrelationCoefficientDock::loadConfig(KConfig& config) {
// Q_UNUSED(config);
//}
///*!
// saves matrix properties to \c config.
// */
//void CorrelationCoefficientDock::saveConfigAsTemplate(KConfig& config) {
// Q_UNUSED(config);
//}
void CorrelationCoefficientDock::setColumnsComboBoxModel(Spreadsheet* spreadsheet) {
m_onlyValuesCols.clear();
m_twoCategoricalCols.clear();
m_multiCategoricalCols.clear();
for (auto* col : spreadsheet->children()) {
if (col->columnMode() == AbstractColumn::Integer || col->columnMode() == AbstractColumn::Numeric)
m_onlyValuesCols.append(col);
else {
int np = 0, n_rows = 0;
countPartitions(col, np, n_rows);
if (np <= 1)
continue;
else if (np == 2)
m_twoCategoricalCols.append(col);
else
m_multiCategoricalCols.append(col);
}
}
setColumnsComboBoxView();
showCorrelationCoefficient();
}
//TODO: change from if else to switch case:
void CorrelationCoefficientDock::setColumnsComboBoxView() {
ui.cbCol1->clear();
ui.cbCol2->clear();
QList::iterator i;
switch (m_test) {
case (CorrelationCoefficient::Test::Pearson): {
for (i = m_onlyValuesCols.begin(); i != m_onlyValuesCols.end(); i++) {
ui.cbCol1->addItem( (*i)->name(), qint64(*i));
ui.cbCol2->addItem( (*i)->name(), qint64(*i));
}
for (i = m_twoCategoricalCols.begin(); i != m_twoCategoricalCols.end(); i++)
ui.cbCol1->addItem( (*i)->name(), qint64(*i));
break;
}
case CorrelationCoefficient::Test::Kendall: {
for (i = m_onlyValuesCols.begin(); i != m_onlyValuesCols.end(); i++) {
ui.cbCol1->addItem( (*i)->name(), qint64(*i));
ui.cbCol2->addItem( (*i)->name(), qint64(*i));
}
for (i = m_twoCategoricalCols.begin(); i != m_twoCategoricalCols.end(); i++) {
ui.cbCol1->addItem( (*i)->name(), qint64(*i));
ui.cbCol2->addItem( (*i)->name(), qint64(*i));
}
for (i = m_multiCategoricalCols.begin(); i != m_multiCategoricalCols.end(); i++) {
ui.cbCol1->addItem( (*i)->name(), qint64(*i));
ui.cbCol2->addItem( (*i)->name(), qint64(*i));
}
break;
}
case CorrelationCoefficient::Test::Spearman: {
for (i = m_onlyValuesCols.begin(); i != m_onlyValuesCols.end(); i++) {
ui.cbCol1->addItem( (*i)->name(), qint64(*i));
ui.cbCol2->addItem( (*i)->name(), qint64(*i));
}
for (i = m_twoCategoricalCols.begin(); i != m_twoCategoricalCols.end(); i++)
ui.cbCol1->addItem( (*i)->name(), qint64(*i));
break;
}
}
}
bool CorrelationCoefficientDock::nonEmptySelectedColumns() {
- if (ui.cbCol1->isVisible() && ui.cbCol1->count() < 1)
- return false;
- if (ui.cbCol2->isVisible() && ui.cbCol2->count() < 1)
+ if ((ui.cbCol1->isVisible() && ui.cbCol1->count() < 1) ||
+ (ui.cbCol2->isVisible() && ui.cbCol2->count() < 1))
return false;
return true;
}
void CorrelationCoefficientDock::countPartitions(Column *column, int &np, int &total_rows) {
total_rows = column->rowCount();
np = 0;
QString cell_value;
QMap discovered_categorical_var;
AbstractColumn::ColumnMode original_col_mode = column->columnMode();
column->setColumnMode(AbstractColumn::Text);
for (int i = 0; i < total_rows; i++) {
cell_value = column->textAt(i);
if (cell_value.isEmpty()) {
total_rows = i;
break;
}
if (discovered_categorical_var[cell_value])
continue;
discovered_categorical_var[cell_value] = true;
np++;
}
column->setColumnMode(original_col_mode);
}
diff --git a/src/kdefrontend/dockwidgets/HypothesisTestDock.cpp b/src/kdefrontend/dockwidgets/HypothesisTestDock.cpp
index 2804cbbc9..ceae0f807 100644
--- a/src/kdefrontend/dockwidgets/HypothesisTestDock.cpp
+++ b/src/kdefrontend/dockwidgets/HypothesisTestDock.cpp
@@ -1,877 +1,875 @@
/***************************************************************************
File : HypothesisTestDock.cpp
Project : LabPlot
Description : widget for hypothesis test properties
--------------------------------------------------------------------
Copyright : (C) 2019 Devanshu Agarwal(agarwaldevanshu8@gmail.com)
***************************************************************************/
/***************************************************************************
* *
* This program is free software; you can redistribute it and/or modify *
* it under the terms of the GNU General Public License as published by *
* the Free Software Foundation; either version 2 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU General Public License for more details. *
* *
* You should have received a copy of the GNU General Public License *
* along with this program; if not, write to the Free Software *
* Foundation, Inc., 51 Franklin Street, Fifth Floor, *
* Boston, MA 02110-1301 USA *
* *
***************************************************************************/
#include "HypothesisTestDock.h"
#include "backend/core/AspectTreeModel.h"
#include "backend/core/AbstractAspect.h"
#include "backend/core/Project.h"
#include "backend/spreadsheet/Spreadsheet.h"
#include "commonfrontend/widgets/TreeViewComboBox.h"
#include "kdefrontend/datasources/DatabaseManagerDialog.h"
#include "kdefrontend/datasources/DatabaseManagerWidget.h"
#include "kdefrontend/TemplateHandler.h"
#include
#include
#include
#include
#include
#include
#include
#include
/*!
\class HypothesisTestDock
\brief Provides a dock (widget) for hypothesis testing:
\ingroup kdefrontend
*/
//TODO: To add tooltips in docks for non obvious widgets.
//TODO: Add functionality for database along with spreadsheet.
HypothesisTestDock::HypothesisTestDock(QWidget* parent) : QWidget(parent) {
//QDEBUG("in hypothesis test constructor ");
ui.setupUi(this);
ui.cbDataSourceType->addItem(i18n("Spreadsheet"));
ui.cbDataSourceType->addItem(i18n("Database"));
cbSpreadsheet = new TreeViewComboBox;
ui.gridLayout->addWidget(cbSpreadsheet, 5, 4, 1, 3);
ui.bDatabaseManager->setIcon(QIcon::fromTheme("network-server-database"));
ui.bDatabaseManager->setToolTip(i18n("Manage connections"));
m_configPath = QStandardPaths::standardLocations(QStandardPaths::AppDataLocation).constFirst() + "sql_connections";
// adding item to tests and testtype combo box;
ui.cbTest->addItem( i18n("T Test"), HypothesisTest::Test::Type::TTest);
ui.cbTest->addItem( i18n("Z Test"), HypothesisTest::Test::Type::ZTest);
ui.cbTest->addItem( i18n("ANOVA"), HypothesisTest::Test::Type::Anova);
ui.lPopulationSigma->setText( UTF8_QSTRING("σ"));
// making all test blocks invisible at starting.
ui.pbLeveneTest->hide();
ui.lCategorical->hide();
ui.chbCategorical->hide();
ui.lCol1->hide();
ui.cbCol1->hide();
ui.lCol2->hide();
ui.cbCol2->hide();
ui.lCol3->hide();
ui.cbCol3->hide();
ui.lEqualVariance->hide();
ui.chbEqualVariance->hide();
ui.chbEqualVariance->setChecked(true);
ui.lPopulationSigma->hide();
ui.lPopulationSigma->setToolTip( i18n("Sigma of Population "
"Hint: Z-Test if preffered over T-Test if this is known"));
ui.chbPopulationSigma->hide();
ui.lePopulationSigma->hide();
ui.pbPerformTest->setEnabled(false);
ui.rbH1OneTail2->hide();
ui.rbH1OneTail1->hide();
ui.rbH1TwoTail->hide();
ui.rbH0OneTail1->hide();
ui.rbH0OneTail2->hide();
ui.rbH0TwoTail->hide();
ui.lH0->hide();
ui.lH1->hide();
QString mu = UTF8_QSTRING("μ");
QString mu0 = UTF8_QSTRING("μₒ");
// radio button for null and alternate hypothesis
// for alternative hypothesis (h1)
// one_tail_1 is mu > mu0; one_tail_2 is mu < mu0; two_tail = mu != mu0;
ui.rbH1OneTail1->setText( i18n("%1 %2 %3", mu, UTF8_QSTRING(">"), mu0));
ui.rbH1OneTail2->setText( i18n("%1 %2 %3", mu, UTF8_QSTRING("<"), mu0));
ui.rbH1TwoTail->setText( i18n("%1 %2 %3", mu, UTF8_QSTRING("≠"), mu0));
ui.rbH0OneTail1->setText( i18n("%1 %2 %3",mu, UTF8_QSTRING("≤"), mu0));
ui.rbH0OneTail2->setText( i18n("%1 %2 %3", mu, UTF8_QSTRING("≥"), mu0));
ui.rbH0TwoTail->setText( i18n("%1 %2 %3", mu, UTF8_QSTRING("="), mu0));
ui.rbH0TwoTail->setEnabled(false);
ui.rbH0OneTail1->setEnabled(false);
ui.rbH0OneTail2->setEnabled(false);
// setting muo and alpha buttons
ui.lMuo->setText( i18n("%1", mu0));
ui.lAlpha->setText( i18n("%1", UTF8_QSTRING("α")));
ui.leMuo->setText( i18n("%1", m_populationMean));
ui.leAlpha->setText( i18n("%1", m_significanceLevel));
ui.lMuo->hide();
ui.lMuo->setToolTip( i18n("Population Mean"));
ui.lAlpha->hide();
ui.lAlpha->setToolTip( i18n("Significance Level"));
ui.leMuo->hide();
ui.leAlpha->hide();
ui.pbPerformTest->setIcon(QIcon::fromTheme("run-build"));
ui.leMuo->setText( i18n("%1", m_populationMean));
ui.leAlpha->setText( i18n("%1", m_significanceLevel));
// readConnections();
// auto* style = ui.bAddRow->style();
// ui.bAddRow->setIcon(style->standardIcon(QStyle::SP_ArrowRight));
// ui.bAddRow->setToolTip(i18n("Add the selected field to rows"));
// ui.bRemoveRow->setIcon(style->standardIcon(QStyle::SP_ArrowLeft));
// ui.bRemoveRow->setToolTip(i18n("Remove the selected field from rows"));
// ui.bAddColumn->setIcon(style->standardIcon(QStyle::SP_ArrowRight));
// ui.bAddColumn->setToolTip(i18n("Add the selected field to columns"));
// ui.bRemoveColumn->setIcon(style->standardIcon(QStyle::SP_ArrowLeft));
// ui.bRemoveColumn->setToolTip(i18n("Remove the selected field from columns"));
// //add/remove buttons only enabled if something was selected
// ui.bAddRow->setEnabled(false);
// ui.bRemoveRow->setEnabled(false);
// ui.bAddColumn->setEnabled(false);
// ui.bRemoveColumn->setEnabled(false);
// connect(ui.leName, &QLineEdit::textChanged, this, &HypothesisTestDock::nameChanged);
// connect(ui.leComment, &QLineEdit::textChanged, this, &HypothesisTestDock::commentChanged);
connect(ui.cbDataSourceType, static_cast(&QComboBox::currentIndexChanged),
this, &HypothesisTestDock::dataSourceTypeChanged);
connect(cbSpreadsheet, &TreeViewComboBox::currentModelIndexChanged, this, &HypothesisTestDock::spreadsheetChanged);
// connect(ui.cbConnection, static_cast(&QComboBox::currentIndexChanged),
// this, &HypothesisTestDock::connectionChanged);
// connect(ui.cbTable, static_cast(&QComboBox::currentIndexChanged),
// this, &HypothesisTestDock::tableChanged);
// connect(ui.bDatabaseManager, &QPushButton::clicked, this, &HypothesisTestDock::showDatabaseManager);
// connect(ui.bAddRow, &QPushButton::clicked, this, &HypothesisTestDock::addRow);
// connect(ui.bRemoveRow, &QPushButton::clicked, this,&HypothesisTestDock::removeRow);
// connect(ui.bAddColumn, &QPushButton::clicked, this, &HypothesisTestDock::addColumn);
// connect(ui.bRemoveColumn, &QPushButton::clicked, this,&HypothesisTestDock::removeColumn);
// connect(ui.cbCol1, static_cast(&QComboBox::currentIndexChanged), this, &HypothesisTestDock::doTTest);
// connect(ui.cbCol2, static_cast(&QComboBox::currentIndexChanged), this, &HypothesisTestDock::doTTest);
// connect(ui.lwFields, &QListWidget::itemSelectionChanged, this, [=]() {
// bool enabled = !ui.lwFields->selectedItems().isEmpty();
// ui.bAddRow->setEnabled(enabled);
// ui.bAddColumn->setEnabled(enabled);
// });
// connect(ui.lwRows, &QListWidget::doubleClicked, this,&HypothesisTestDock::removeRow);
// connect(ui.lwRows, &QListWidget::itemSelectionChanged, this, [=]() {
// ui.bRemoveRow->setEnabled(!ui.lwRows->selectedItems().isEmpty());
// });
// connect(ui.lwColumns, &QListWidget::doubleClicked, this,&HypothesisTestDock::removeColumn);
// connect(ui.lwColumns, &QListWidget::itemSelectionChanged, this, [=]() {
// ui.bRemoveColumn->setEnabled(!ui.lwColumns->selectedItems().isEmpty());
// });
connect(ui.cbTest, static_cast(&QComboBox::currentIndexChanged), this, &HypothesisTestDock::showTestType);
connect(ui.cbTestType, static_cast(&QComboBox::currentIndexChanged), this, &HypothesisTestDock::showHypothesisTest);
// connect(ui.cbTest, static_cast(&QComboBox::currentIndexChanged), this, &HypothesisTestDock::showHypothesisTest);
// connect(ui.cbTestType, static_cast(&QComboBox::currentIndexChanged), this, &HypothesisTestDock::showHypothesisTest);
connect(ui.pbPerformTest, &QPushButton::clicked, this, &HypothesisTestDock::doHypothesisTest);
connect(ui.pbLeveneTest, &QPushButton::clicked, this, &HypothesisTestDock::performLeveneTest);
//connecting null hypothesis and alternate hypothesis radio button
connect(ui.rbH1OneTail1, &QRadioButton::toggled, this, &HypothesisTestDock::onRbH1OneTail1Toggled);
connect(ui.rbH1OneTail2, &QRadioButton::toggled, this, &HypothesisTestDock::onRbH1OneTail2Toggled);
connect(ui.rbH1TwoTail, &QRadioButton::toggled, this, &HypothesisTestDock::onRbH1TwoTailToggled);
connect(ui.cbCol1, static_cast(&QComboBox::currentIndexChanged), this, &HypothesisTestDock::col1IndexChanged);
connect(ui.chbCategorical, &QCheckBox::stateChanged, this, &HypothesisTestDock::changeCbCol2Label);
connect(ui.chbPopulationSigma, &QCheckBox::stateChanged, this, &HypothesisTestDock::chbPopulationSigmaStateChanged);
ui.cbTest->setCurrentIndex(0);
emit ui.cbTest->currentIndexChanged(0);
ui.cbTestType->setCurrentIndex(0);
emit ui.cbTestType->currentIndexChanged(0);
}
void HypothesisTestDock::setHypothesisTest(HypothesisTest* HypothesisTest) {
//QDEBUG("in set hypothesis test");
m_initializing = true;
m_hypothesisTest = HypothesisTest;
m_aspectTreeModel = new AspectTreeModel(m_hypothesisTest->project());
QList list{AspectType::Folder, AspectType::Workbook,
AspectType::Spreadsheet, AspectType::LiveDataSource};
cbSpreadsheet->setTopLevelClasses(list);
list = {AspectType::Spreadsheet, AspectType::LiveDataSource};
m_aspectTreeModel->setSelectableAspects(list);
cbSpreadsheet->setModel(m_aspectTreeModel);
//show the properties
ui.leName->setText(m_hypothesisTest->name());
ui.leComment->setText(m_hypothesisTest->comment());
ui.cbDataSourceType->setCurrentIndex(m_hypothesisTest->dataSourceType());
if (m_hypothesisTest->dataSourceType() == HypothesisTest::DataSourceType::DataSourceSpreadsheet)
setModelIndexFromAspect(cbSpreadsheet, m_hypothesisTest->dataSourceSpreadsheet());
// else
// ui.cbConnection->setCurrentIndex(ui.cbConnection->findText(m_hypothesisTest->dataSourceConnection()));
setColumnsComboBoxModel(m_hypothesisTest->dataSourceSpreadsheet());
this->dataSourceTypeChanged(ui.cbDataSourceType->currentIndex());
//setting rows and columns in combo box;
//undo functions
// connect(m_hypothesisTest, SIGNAL(aspectDescriptionChanged(const AbstractAspect*)), this, SLOT(hypothesisTestDescriptionChanged(const AbstractAspect*)));
m_initializing = false;
}
void HypothesisTestDock::showTestType() {
//QDEBUG("in show test type");
m_test.type = HypothesisTest::Test::Type(ui.cbTest->currentData().toInt());
ui.cbTestType->clear();
if (m_test.type & (HypothesisTest::Test::Type::TTest | HypothesisTest::Test::Type::ZTest)) {
ui.cbTestType->addItem( i18n("Two Sample Independent"), HypothesisTest::Test::SubType::TwoSampleIndependent);
ui.cbTestType->addItem( i18n("Two Sample Paired"), HypothesisTest::Test::SubType::TwoSamplePaired);
ui.cbTestType->addItem( i18n("One Sample"), HypothesisTest::Test::SubType::OneSample);
} else if (m_test.type & HypothesisTest::Test::Type::Anova) {
ui.cbTestType->addItem( i18n("One Way"), HypothesisTest::Test::SubType::OneWay);
ui.cbTestType->addItem( i18n("Two Way"), HypothesisTest::Test::SubType::TwoWay);
}
}
void HypothesisTestDock::showHypothesisTest() {
//QDEBUG("in showHypothesisTest");
if (ui.cbTestType->count() == 0)
return;
m_test.subtype = HypothesisTest::Test::SubType(ui.cbTestType->currentData().toInt());
ui.lCol1->show();
ui.cbCol1->show();
ui.lCol2->setVisible(bool(m_test.subtype & (~HypothesisTest::Test::SubType::OneSample)));
ui.cbCol2->setVisible(bool(m_test.subtype & (~HypothesisTest::Test::SubType::OneSample)));
ui.lCol3->setVisible(bool(m_test.type & (HypothesisTest::Test::Anova) &
setAllBits(m_test.subtype & HypothesisTest::Test::SubType::TwoWay)));
ui.cbCol3->setVisible(bool(m_test.type & (HypothesisTest::Test::Anova) &
setAllBits(m_test.subtype & HypothesisTest::Test::SubType::TwoWay)));
ui.lEqualVariance->setVisible(bool( (m_test.type & HypothesisTest::Test::Type::TTest) &
(m_test.subtype & HypothesisTest::Test::SubType::TwoSampleIndependent)));
ui.chbEqualVariance->setVisible(bool( (m_test.type & HypothesisTest::Test::Type::TTest) &
(m_test.subtype & HypothesisTest::Test::SubType::TwoSampleIndependent)));
ui.lCategorical->setVisible(bool((m_test.type & HypothesisTest::Test::Type::TTest) &
(m_test.subtype & HypothesisTest::Test::SubType::TwoSampleIndependent)));
ui.chbCategorical->setVisible(bool((m_test.type & HypothesisTest::Test::Type::TTest) &
(m_test.subtype & HypothesisTest::Test::SubType::TwoSampleIndependent)));
ui.chbEqualVariance->setChecked(true);
ui.lPopulationSigma->setVisible(bool((m_test.type & (HypothesisTest::Test::Type::TTest |
HypothesisTest::Test::Type::ZTest)) &
~(setAllBits(m_test.subtype & HypothesisTest::Test::SubType::OneSample))));
ui.chbPopulationSigma->setVisible(bool((m_test.type & (HypothesisTest::Test::Type::TTest |
HypothesisTest::Test::Type::ZTest)) &
~(setAllBits(m_test.subtype & HypothesisTest::Test::SubType::OneSample)))); ui.chbPopulationSigma->setChecked(false);
ui.pbLeveneTest->setVisible(bool((m_test.type & HypothesisTest::Test::Type::Anova &
setAllBits(m_test.subtype & HypothesisTest::Test::SubType::OneWay)) |
(HypothesisTest::Test::Type::TTest &
setAllBits(m_test.subtype & HypothesisTest::Test::SubType::TwoSampleIndependent))));
ui.lH1->setVisible(bool(m_test.type & ~HypothesisTest::Test::Type::Anova));
ui.rbH1OneTail1->setVisible(bool(m_test.type & ~HypothesisTest::Test::Type::Anova));
ui.rbH1OneTail2->setVisible(bool(m_test.type & ~HypothesisTest::Test::Type::Anova));
ui.rbH1TwoTail->setVisible(bool(m_test.type & ~HypothesisTest::Test::Type::Anova));
ui.lH0->setVisible(bool(m_test.type & ~HypothesisTest::Test::Type::Anova));
ui.rbH0OneTail1->setVisible(bool(m_test.type & ~HypothesisTest::Test::Type::Anova));
ui.rbH0OneTail2->setVisible(bool(m_test.type & ~HypothesisTest::Test::Type::Anova));
ui.rbH0TwoTail->setVisible(bool(m_test.type & ~HypothesisTest::Test::Type::Anova));
ui.rbH1TwoTail->setChecked(true);
ui.lMuo->setVisible(bool(m_test.subtype & HypothesisTest::Test::SubType::OneSample));
ui.leMuo->setVisible(bool(ui.lMuo->isVisible()));
ui.lAlpha->show();
ui.leAlpha->show();
setColumnsComboBoxView();
ui.pbPerformTest->setEnabled(nonEmptySelectedColumns());
ui.pbLeveneTest->setEnabled(nonEmptySelectedColumns());
}
void HypothesisTestDock::doHypothesisTest() {
//QDEBUG("in doHypothesisTest");
m_hypothesisTest->setPopulationMean(ui.leMuo->text());
m_hypothesisTest->setSignificanceLevel(ui.leAlpha->text());
QVector cols;
if (ui.cbCol1->count() == 0)
return;
cols << reinterpret_cast(ui.cbCol1->currentData().toLongLong());
if (m_test.subtype & HypothesisTest::Test::SubType::TwoWay)
cols << reinterpret_cast(ui.cbCol3->currentData().toLongLong());
if (m_test.subtype & (~HypothesisTest::Test::SubType::OneSample))
if (ui.cbCol2->count() > 0)
cols << reinterpret_cast(ui.cbCol2->currentData().toLongLong());
m_hypothesisTest->setColumns(cols);
m_hypothesisTest->performTest(m_test, ui.chbCategorical->isChecked(), ui.chbEqualVariance->isChecked());
}
void HypothesisTestDock::performLeveneTest() {
QVector cols;
if (ui.cbCol1->count() == 0 || ui.cbCol2->count() == 0)
return;
cols << reinterpret_cast(ui.cbCol1->currentData().toLongLong());
cols << reinterpret_cast(ui.cbCol2->currentData().toLongLong());
m_hypothesisTest->setColumns(cols);
m_hypothesisTest->setSignificanceLevel(ui.leAlpha->text());
m_hypothesisTest->performLeveneTest(ui.chbCategorical->isChecked());
}
void HypothesisTestDock::setModelIndexFromAspect(TreeViewComboBox* cb, const AbstractAspect* aspect) {
if (aspect)
cb->setCurrentModelIndex(m_aspectTreeModel->modelIndexOfAspect(aspect));
else
cb->setCurrentModelIndex(QModelIndex());
}
///*!
// shows the database manager where the connections are created and edited.
// The selected connection is selected in the connection combo box in this widget.
//**/
//void HypothesisTestDock::showDatabaseManager() {
// DatabaseManagerDialog* dlg = new DatabaseManagerDialog(this, ui.cbConnection->currentText());
// if (dlg->exec() == QDialog::Accepted) {
// //re-read the available connections to be in sync with the changes in DatabaseManager
// m_initializing = true;
// ui.cbConnection->clear();
// readConnections();
// //select the connection the user has selected in DatabaseManager
// const QString& conn = dlg->connection();
// ui.cbConnection->setCurrentIndex(ui.cbConnection->findText(conn));
// m_initializing = false;
// connectionChanged();
// }
// delete dlg;
//}
///*!
// loads all available saved connections
//*/
//void HypothesisTestDock::readConnections() {
// DEBUG("ImportSQLDatabaseWidget: reading available connections");
// KConfig config(m_configPath, KConfig::SimpleConfig);
// for (const auto& name : config.groupList())
// ui.cbConnection->addItem(name);
//}
///*!
// * adds the selected field to the rows
// */
//void HypothesisTestDock::addRow() {
// QString field = ui.lwFields->currentItem()->text();
// ui.lwRows->addItem(field);
// ui.lwFields->takeItem(ui.lwFields->currentRow());
// m_hypothesisTest->addToRows(field);
//}
///*!
// * removes the selected field from the rows
// */
//void HypothesisTestDock::removeRow() {
// const QString& field = ui.lwRows->currentItem()->text();
// ui.lwRows->takeItem(ui.lwRows->currentRow());
// m_hypothesisTest->removeFromRows(field);
// updateFields();
//}
///*!
// * adds the selected field to the columns
// */
//void HypothesisTestDock::addColumn() {
// QString field = ui.lwFields->currentItem()->text();
// ui.lwColumns->addItem(field);
// ui.lwFields->takeItem(ui.lwFields->currentRow());
// m_hypothesisTest->addToColumns(field);
//}
///*!
// * removes the selected field from the columns
// */
//void HypothesisTestDock::removeColumn() {
// const QString& field = ui.lwColumns->currentItem()->text();
// ui.lwColumns->takeItem(ui.lwColumns->currentRow());
// m_hypothesisTest->removeFromColumns(field);
// updateFields();
//}
///*!
// * re-populates the content of the "Fields" list widget by adding the non-selected fields only.
// * called when a selected field is removed from rows or columns.
// */
//void HypothesisTestDock::updateFields() {
// ui.lwFields->clear();
// for (auto dimension : m_hypothesisTest->dimensions())
// if (!fieldSelected(dimension))
// ui.lwFields->addItem(new QListWidgetItem(QIcon::fromTheme("draw-text"), dimension));
// for (auto measure : m_hypothesisTest->measures())
// if (!fieldSelected(measure))
// ui.lwFields->addItem(new QListWidgetItem(measure));
//}
///*!
// * return \c true if the field name \c field was selected among rows or columns,
// * return \c false otherwise.
// * */
//bool HypothesisTestDock::fieldSelected(const QString& field) {
// for (int i = 0; icount(); ++i)
// if (ui.lwRows->item(i)->text() == field)
// return true;
// for (int i = 0; icount(); ++i)
// if (ui.lwColumns->item(i)->text() == field)
// return true;
// return false;
//}
////*************************************************************
////****** SLOTs for changes triggered in HypothesisTestDock *******
////*************************************************************
//void HypothesisTestDock::nameChanged() {
// if (m_initializing)
// return;
// m_hypothesisTest->setName(ui.leName->text());
//}
//void HypothesisTestDock::commentChanged() {
// if (m_initializing)
// return;
// m_hypothesisTest->setComment(ui.leComment->text());
//}
void HypothesisTestDock::dataSourceTypeChanged(int index) {
//QDEBUG("in dataSourceTypeChanged");
HypothesisTest::DataSourceType type = static_cast(index);
bool showDatabase = (type == HypothesisTest::DataSourceType::DataSourceDatabase);
ui.lSpreadsheet->setVisible(!showDatabase);
cbSpreadsheet->setVisible(!showDatabase);
ui.lConnection->setVisible(showDatabase);
ui.cbConnection->setVisible(showDatabase);
ui.bDatabaseManager->setVisible(showDatabase);
ui.lTable->setVisible(showDatabase);
ui.cbTable->setVisible(showDatabase);
if (m_initializing)
return;
m_hypothesisTest->setComment(ui.leComment->text());
}
void HypothesisTestDock::spreadsheetChanged(const QModelIndex& index) {
//QDEBUG("in spreadsheetChanged");
auto* aspect = static_cast(index.internalPointer());
Spreadsheet* spreadsheet = dynamic_cast(aspect);
setColumnsComboBoxModel(spreadsheet);
m_hypothesisTest->setDataSourceSpreadsheet(spreadsheet);
}
void HypothesisTestDock::changeCbCol2Label() {
//QDEBUG("in changeCbCol2Label");
if ( (m_test.type & ~HypothesisTest::Test::Type::Anova) & (m_test.subtype & ~HypothesisTest::Test::SubType::TwoSampleIndependent)) {
ui.lCol2->setText( i18n("Independent Var. 2"));
return;
}
if (ui.cbCol1->count() == 0) return;
QString selected_text = ui.cbCol1->currentText();
Column* col1 = m_hypothesisTest->dataSourceSpreadsheet()->column(selected_text);
if (!ui.chbCategorical->isChecked() && (col1->columnMode() == AbstractColumn::Integer || col1->columnMode() == AbstractColumn::Numeric)) {
ui.lCol2->setText( i18n("Independent Var. 2"));
ui.chbCategorical->setChecked(false);
ui.chbCategorical->setEnabled(true);
} else {
ui.lCol2->setText( i18n("Dependent Var. 1"));
if (!ui.chbCategorical->isChecked())
ui.chbCategorical->setEnabled(false);
else
ui.chbCategorical->setEnabled(true);
ui.chbCategorical->setChecked(true);
}
}
void HypothesisTestDock::chbPopulationSigmaStateChanged() {
if (ui.chbPopulationSigma->isVisible() && ui.chbPopulationSigma->isChecked())
ui.lePopulationSigma->show();
else
ui.lePopulationSigma->hide();
}
void HypothesisTestDock::col1IndexChanged(int index) {
if (index < 0) return;
changeCbCol2Label();
}
//void HypothesisTestDock::connectionChanged() {
// if (ui.cbConnection->currentIndex() == -1) {
// ui.lTable->hide();
// ui.cbTable->hide();
// return;
// }
// //clear the previously shown tables
// ui.cbTable->clear();
// ui.lTable->show();
// ui.cbTable->show();
// const QString& connection = ui.cbConnection->currentText();
// //connection name was changed, determine the current connections settings
// KConfig config(m_configPath, KConfig::SimpleConfig);
// KConfigGroup group = config.group(connection);
// //close and remove the previos connection, if available
// if (m_db.isOpen()) {
// m_db.close();
// QSqlDatabase::removeDatabase(m_db.driverName());
// }
// //open the selected connection
// //QDEBUG("HypothesisTestDock: connecting to " + connection);
// const QString& driver = group.readEntry("Driver");
// m_db = QSqlDatabase::addDatabase(driver);
// const QString& dbName = group.readEntry("DatabaseName");
// if (DatabaseManagerWidget::isFileDB(driver)) {
// if (!QFile::exists(dbName)) {
// KMessageBox::error(this, i18n("Couldn't find the database file '%1'. Please check the connection settings.", dbName),
// appendRow i18n("Connection Failed"));
// return;
// } else
// m_db.setDatabaseName(dbName);
// } else if (DatabaseManagerWidget::isODBC(driver)) {
// if (group.readEntry("CustomConnectionEnabled", false))
// m_db.setDatabaseName(group.readEntry("CustomConnectionString"));
// else
// m_db.setDatabaseName(dbName);
// } else {
// m_db.setDatabaseName(dbName);
// m_db.setHostName( group.readEntry("HostName") );
// m_db.setPort( group.readEntry("Port", 0) );
// m_db.setUserName( group.readEntry("UserName") );
// m_db.setPassword( group.readEntry("Password") );
// }
// WAIT_CURSOR;
// if (!m_db.open()) {
// RESET_CURSOR;
// KMessageBox::error(this, i18n("Failed to connect to the database '%1'. Please check the connection settings.", ui.cbConnection->currentText()) +
// QLatin1String("\n\n") + m_db.lastError().databaseText(),
// i18n("Connection Failed"));
// return;
// }
// //show all available database tables
// if (m_db.tables().size()) {
// for (auto table : m_db.tables())
// ui.cbTable->addItem(QIcon::fromTheme("view-form-table"), table);
// ui.cbTable->setCurrentIndex(0);
// }
// RESET_CURSOR;
// if (m_initializing)
// return;
//// m_hypothesisTest->setDataSourceConnection(connection);
//}
//void HypothesisTestDock::tableChanged() {
// const QString& table = ui.cbTable->currentText();
// //show all attributes of the selected table
//// for (const auto* col : spreadsheet->children()) {
//// QListWidgetItem* item = new QListWidgetItem(col->icon(), col->name());
//// ui.lwFields->addItem(item);
//// }
// if (m_initializing)
// return;
//// m_hypothesisTest->setDataSourceTable(table);
//}
////*************************************************************
////******** SLOTs for changes triggered in Spreadsheet *********
////*************************************************************
void HypothesisTestDock::hypothesisTestDescriptionChanged(const AbstractAspect* aspect) {
//QDEBUG("in hypothesisTestDescriptionChanged");
if (m_hypothesisTest != aspect)
return;
m_initializing = true;
if (aspect->name() != ui.leName->text())
ui.leName->setText(aspect->name());
else if (aspect->comment() != ui.leComment->text())
ui.leComment->setText(aspect->comment());
m_initializing = false;
}
////*************************************************************
////******************** SETTINGS *******************************
////*************************************************************
//void HypothesisTestDock::load() {
//}
//void HypothesisTestDock::loadConfigFromTemplate(KConfig& config) {
// Q_UNUSED(config);
//}
///*!
// loads saved matrix properties from \c config.
// */
//void HypothesisTestDock::loadConfig(KConfig& config) {
// Q_UNUSED(config);
//}
///*!
// saves matrix properties to \c config.
// */
//void HypothesisTestDock::saveConfigAsTemplate(KConfig& config) {
// Q_UNUSED(config);
//}
//TODO: Rather than inbuilt slots use own decided slots for checked rather than clicked
// for alternate hypothesis
// one_tail_1 is mu > mu0; one_tail_2 is mu < mu0; two_tail = mu != mu0;
void HypothesisTestDock::onRbH1OneTail1Toggled(bool checked) {
if (!checked) return;
ui.rbH0OneTail1->setChecked(true);
m_test.tail = HypothesisTest::Test::Tail::Positive;
}
void HypothesisTestDock::onRbH1OneTail2Toggled(bool checked) {
if (!checked) return;
ui.rbH0OneTail2->setChecked(true);
m_test.tail = HypothesisTest::Test::Tail::Negative;
}
void HypothesisTestDock::onRbH1TwoTailToggled(bool checked) {
if (!checked) return;
ui.rbH0TwoTail->setChecked(true);
m_test.tail = HypothesisTest::Test::Tail::Two;
}
/**************************************Helper Functions********************************************/
void HypothesisTestDock::countPartitions(Column *column, int &np, int &total_rows) {
total_rows = column->rowCount();
np = 0;
QString cell_value;
QMap discovered_categorical_var;
AbstractColumn::ColumnMode original_col_mode = column->columnMode();
column->setColumnMode(AbstractColumn::Text);
for (int i = 0; i < total_rows; i++) {
cell_value = column->textAt(i);
if (cell_value.isEmpty()) {
total_rows = i;
break;
}
if (discovered_categorical_var[cell_value])
continue;
discovered_categorical_var[cell_value] = true;
np++;
}
column->setColumnMode(original_col_mode);
}
void HypothesisTestDock::setColumnsComboBoxModel(Spreadsheet* spreadsheet) {
m_onlyValuesCols.clear();
m_twoCategoricalCols.clear();
m_multiCategoricalCols.clear();
for (auto* col : spreadsheet->children()) {
if (col->columnMode() == AbstractColumn::Integer || col->columnMode() == AbstractColumn::Numeric)
m_onlyValuesCols.append(col);
else {
int np = 0, n_rows = 0;
countPartitions(col, np, n_rows);
if (np <= 1)
continue;
else if (np == 2)
m_twoCategoricalCols.append(col);
else
m_multiCategoricalCols.append(col);
}
}
setColumnsComboBoxView();
showHypothesisTest();
}
//TODO: change from if else to switch case:
void HypothesisTestDock::setColumnsComboBoxView() {
ui.cbCol1->clear();
ui.cbCol2->clear();
ui.cbCol3->clear();
QList::iterator i;
switch (m_test.type) {
case (HypothesisTest::Test::Type::ZTest):
case (HypothesisTest::Test::Type::TTest): {
switch (m_test.subtype) {
case (HypothesisTest::Test::SubType::TwoSampleIndependent): {
for (i = m_onlyValuesCols.begin(); i != m_onlyValuesCols.end(); i++) {
ui.cbCol1->addItem( (*i)->name(), qint64(*i));
ui.cbCol2->addItem( (*i)->name(), qint64(*i));
}
for (i = m_twoCategoricalCols.begin(); i != m_twoCategoricalCols.end(); i++)
ui.cbCol1->addItem( (*i)->name(), qint64(*i));
break;
}
case (HypothesisTest::Test::SubType::TwoSamplePaired): {
for (i = m_onlyValuesCols.begin(); i != m_onlyValuesCols.end(); i++) {
ui.cbCol1->addItem( (*i)->name(), qint64(*i));
ui.cbCol2->addItem( (*i)->name(), qint64(*i));
}
break;
}
case (HypothesisTest::Test::SubType::OneSample): {
for (i = m_onlyValuesCols.begin(); i != m_onlyValuesCols.end(); i++)
ui.cbCol1->addItem( (*i)->name(), qint64(*i));
break;
}
case HypothesisTest::Test::SubType::OneWay:
case HypothesisTest::Test::SubType::TwoWay:
case HypothesisTest::Test::SubType::NoneSubType:
break;
}
break;
}
case HypothesisTest::Test::Type::Anova: {
switch (m_test.subtype) {
case HypothesisTest::Test::SubType::OneWay: {
for (i = m_onlyValuesCols.begin(); i != m_onlyValuesCols.end(); i++)
ui.cbCol2->addItem( (*i)->name(), qint64(*i));
for (i = m_twoCategoricalCols.begin(); i != m_twoCategoricalCols.end(); i++)
ui.cbCol1->addItem( (*i)->name(), qint64(*i));
for (i = m_multiCategoricalCols.begin(); i != m_multiCategoricalCols.end(); i++)
ui.cbCol1->addItem( (*i)->name(), qint64(*i));
break;
}
case HypothesisTest::Test::SubType::TwoWay: {
for (i = m_onlyValuesCols.begin(); i != m_onlyValuesCols.end(); i++)
ui.cbCol2->addItem( (*i)->name(), qint64(*i));
for (i = m_twoCategoricalCols.begin(); i != m_twoCategoricalCols.end(); i++) {
ui.cbCol1->addItem( (*i)->name(), qint64(*i));
ui.cbCol3->addItem( (*i)->name(), qint64(*i));
}
for (i = m_multiCategoricalCols.begin(); i != m_multiCategoricalCols.end(); i++) {
ui.cbCol1->addItem( (*i)->name(), qint64(*i));
ui.cbCol3->addItem( (*i)->name(), qint64(*i));
}
break;
}
case HypothesisTest::Test::SubType::TwoSampleIndependent:
case HypothesisTest::Test::SubType::TwoSamplePaired:
case HypothesisTest::Test::SubType::OneSample:
case HypothesisTest::Test::SubType::NoneSubType:
break;
}
break;
}
case HypothesisTest::Test::Type::NoneType:
break;
}
}
bool HypothesisTestDock::nonEmptySelectedColumns() {
- if (ui.cbCol1->isVisible() && ui.cbCol1->count() < 1)
- return false;
- if (ui.cbCol2->isVisible() && ui.cbCol2->count() < 1)
- return false;
- if (ui.cbCol3->isVisible() && ui.cbCol3->count() < 1)
+ if ((ui.cbCol1->isVisible() && ui.cbCol1->count() < 1) ||
+ (ui.cbCol2->isVisible() && ui.cbCol2->count() < 1) ||
+ (ui.cbCol3->isVisible() && ui.cbCol3->count() < 1))
return false;
return true;
}
uint8_t HypothesisTestDock::setAllBits(const uint8_t& bits) {
if (!bits)
return 0;
return ~(bits & (bits-1));
}
|