diff --git a/src/backend/generalTest/CorrelationCoefficient.cpp b/src/backend/generalTest/CorrelationCoefficient.cpp
index 4a20ba843..f1f609936 100644
--- a/src/backend/generalTest/CorrelationCoefficient.cpp
+++ b/src/backend/generalTest/CorrelationCoefficient.cpp
@@ -1,423 +1,424 @@
/***************************************************************************
File : CorrelationCoefficient.cpp
Project : LabPlot
Description : Finding Correlation Coefficient on data provided
--------------------------------------------------------------------
Copyright : (C) 2019 Devanshu Agarwal(agarwaldevanshu8@gmail.com)
***************************************************************************/
/***************************************************************************
* *
* This program is free software; you can redistribute it and/or modify *
* it under the terms of the GNU General Public License as published by *
* the Free Software Foundation; either version 2 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU General Public License for more details. *
* *
* You should have received a copy of the GNU General Public License *
* along with this program; if not, write to the Free Software *
* Foundation, Inc., 51 Franklin Street, Fifth Floor, *
* Boston, MA 02110-1301 USA *
* *
***************************************************************************/
#include "CorrelationCoefficient.h"
#include "GeneralTest.h"
#include "kdefrontend/generalTest/CorrelationCoefficientView.h"
#include "backend/spreadsheet/Spreadsheet.h"
#include "backend/core/column/Column.h"
#include "backend/lib/macros.h"
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
extern "C" {
#include "backend/nsl/nsl_stats.h"
}
CorrelationCoefficient::CorrelationCoefficient(const QString& name) : GeneralTest(name, AspectType::CorrelationCoefficient) {
}
CorrelationCoefficient::~CorrelationCoefficient() {
}
void CorrelationCoefficient::performTest(int test, bool categoricalVariable, bool calculateStats) {
m_statsTable = "";
m_correlationValue = 0;
m_statisticValue.clear();
m_pValue.clear();
m_inputStatsTableModel->clear();
for (int i = 0; i < RESULTLINESCOUNT; i++)
m_resultLine[i]->clear();
switch (testType(test)) {
case CorrelationCoefficient::Pearson: {
m_currTestName = "" + i18n("Pearson's r Correlation Test") + "
";
performPearson(categoricalVariable);
break;
}
case CorrelationCoefficient::Kendall:
m_currTestName = "" + i18n("Kendall's Rank Correlation Test") + "
";
performKendall();
break;
case CorrelationCoefficient::Spearman: {
m_currTestName = "" + i18n("Spearman Correlation Coefficient Test") + "
";
performSpearman();
break;
}
case CorrelationCoefficient::ChiSquare:
switch (testSubtype(test)) {
case CorrelationCoefficient::IndependenceTest:
performChiSquareIndpendence(calculateStats);
break;
}
break;
}
emit changed();
}
double CorrelationCoefficient::correlationValue() const {
return m_correlationValue;
}
QList CorrelationCoefficient::statisticValue() const {
return m_statisticValue;
}
QList CorrelationCoefficient::pValue() const {
return m_pValue;
}
/***************************************************************************************************************************
* Private Implementations
* ************************************************************************************************************************/
/*********************************************Pearson r ******************************************************************/
//Formulaes are taken from https://www.statisticssolutions.com/correlation-pearson-kendall-spearman/
// variables:
// N = total number of observations
// sumColx = sum of values in colx
// sumSqColx = sum of square of values in colx
// sumColxColy = sum of product of values in colx and coly
//TODO: support for col1 is categorical.
//TODO: add tooltip for correlation value result
//TODO: find p value
void CorrelationCoefficient::performPearson(bool categoricalVariable) {
if (m_columns.count() != 2) {
printError("Select only 2 columns ");
return;
}
if (categoricalVariable) {
printLine(1, "currently categorical variable not supported", "blue");
return;
}
QString col1Name = m_columns[0]->name();
QString col2Name = m_columns[1]->name();
if (!m_columns[1]->isNumeric())
printError("Column " + col2Name + " should contain only numeric or interger values");
int N = findCount(m_columns[0]);
if (N != findCount(m_columns[1])) {
printError("Number of data values in Column: " + col1Name + "and Column: " + col2Name + "are not equal");
return;
}
double sumCol1 = findSum(m_columns[0], N);
double sumCol2 = findSum(m_columns[1], N);
double sumSqCol1 = findSumSq(m_columns[0], N);
double sumSqCol2 = findSumSq(m_columns[1], N);
double sumCol12 = 0;
for (int i = 0; i < N; i++)
sumCol12 += m_columns[0]->valueAt(i) *
m_columns[1]->valueAt(i);
// printing table;
// HtmlCell constructor structure; data, level, rowSpanCount, m_columnspanCount, isHeader;
QList rowMajor;
int level = 0;
// horizontal header
QString sigma = UTF8_QSTRING("Σ");
rowMajor.append(new HtmlCell("", level, true));
rowMajor.append(new HtmlCell("N", level, true, "Total Number of Observations"));
rowMajor.append(new HtmlCell(QString(sigma + "Scores"), level, true, "Sum of Scores in each column"));
rowMajor.append(new HtmlCell(QString(sigma + "Scores2"), level, true, "Sum of Squares of scores in each column"));
rowMajor.append(new HtmlCell(QString(sigma + "(" + UTF8_QSTRING("∏") + "Scores)"), level, true, "Sum of product of scores of both columns"));
//data with vertical header.
level++;
rowMajor.append(new HtmlCell(col1Name, level, true));
rowMajor.append(new HtmlCell(N, level));
rowMajor.append(new HtmlCell(sumCol1, level));
rowMajor.append(new HtmlCell(sumSqCol1, level));
rowMajor.append(new HtmlCell(sumCol12, level, false, "", 2, 1));
level++;
rowMajor.append(new HtmlCell(col2Name, level, true));
rowMajor.append(new HtmlCell(N, level));
rowMajor.append(new HtmlCell(sumCol2, level));
rowMajor.append(new HtmlCell(sumSqCol2, level));
m_statsTable += getHtmlTable3(rowMajor);
m_correlationValue = (N * sumCol12 - sumCol1*sumCol2) /
sqrt((N * sumSqCol1 - gsl_pow_2(sumCol1)) *
(N * sumSqCol2 - gsl_pow_2(sumCol2)));
printLine(0, QString("Correlation Value is %1").arg(round(m_correlationValue)), "green");
}
/***********************************************Kendall ******************************************************************/
// used knight algorithm for fast performance O(nlogn) rather than O(n^2)
// http://adereth.github.io/blog/2013/10/30/efficiently-computing-kendalls-tau/
// TODO: Change date format type to original for numeric type;
// TODO: add tooltips.
// TODO: Compute tauB for ties.
// TODO: find P Value from Z Value
void CorrelationCoefficient::performKendall() {
if (m_columns.count() != 2) {
printError("Select only 2 columns ");
return;
}
QString col1Name = m_columns[0]->name();
QString col2Name = m_columns[1]->name();
int N = findCount(m_columns[0]);
if (N != findCount(m_columns[1])) {
printError("Number of data values in Column: " + col1Name + "and Column: " + col2Name + "are not equal");
return;
}
QVector col2Ranks(N);
if (m_columns[0]->isNumeric()) {
if (m_columns[0]->isNumeric() && m_columns[1]->isNumeric()) {
for (int i = 0; i < N; i++)
col2Ranks[int(m_columns[0]->valueAt(i)) - 1] = int(m_columns[1]->valueAt(i));
} else {
printError(QString("Ranking System should be same for both Column: %1 and Column: %2
"
"Hint: Check for data types of columns").arg(col1Name).arg(col2Name));
return;
}
} else {
AbstractColumn::ColumnMode origCol1Mode = m_columns[0]->columnMode();
AbstractColumn::ColumnMode origCol2Mode = m_columns[1]->columnMode();
m_columns[0]->setColumnMode(AbstractColumn::Text);
m_columns[1]->setColumnMode(AbstractColumn::Text);
QMap ValueToRank;
for (int i = 0; i < N; i++) {
if (ValueToRank[m_columns[0]->textAt(i)] != 0) {
printError("Currently ties are not supported");
m_columns[0]->setColumnMode(origCol1Mode);
m_columns[1]->setColumnMode(origCol2Mode);
return;
}
ValueToRank[m_columns[0]->textAt(i)] = i + 1;
}
for (int i = 0; i < N; i++)
col2Ranks[i] = ValueToRank[m_columns[1]->textAt(i)];
m_columns[0]->setColumnMode(origCol1Mode);
m_columns[1]->setColumnMode(origCol2Mode);
}
int nPossiblePairs = (N * (N - 1)) / 2;
int nDiscordant = findDiscordants(col2Ranks.data(), 0, N - 1);
int nCorcordant = nPossiblePairs - nDiscordant;
m_correlationValue = double(nCorcordant - nDiscordant) / nPossiblePairs;
m_statisticValue.append((3 * (nCorcordant - nDiscordant)) /
sqrt(N * (N- 1) * (2 * N + 5) / 2));
printLine(0, QString("Number of Discordants are %1").arg(nDiscordant), "green");
printLine(1, QString("Number of Concordant are %1").arg(nCorcordant), "green");
printLine(2, QString("Tau a is %1").arg(round(m_correlationValue)), "green");
printLine(3, QString("Z Value is %1").arg(round(m_statisticValue[0])), "green");
return;
}
/***********************************************Spearman ******************************************************************/
// All formulaes and symbols are taken from : https://www.statisticshowto.datasciencecentral.com/spearman-rank-correlation-definition-calculate/
void CorrelationCoefficient::performSpearman() {
if (m_columns.count() != 2) {
printError("Select only 2 columns ");
return;
}
QString col1Name = m_columns[0]->name();
QString col2Name = m_columns[1]->name();
int N = findCount(m_columns[0]);
if (N != findCount(m_columns[1])) {
printError("Number of data values in Column: " + col1Name + "and Column: " + col2Name + "are not equal");
return;
}
QMap col1Ranks;
convertToRanks(m_columns[0], N, col1Ranks);
QMap col2Ranks;
convertToRanks(m_columns[1], N, col2Ranks);
double ranksCol1Mean = 0;
double ranksCol2Mean = 0;
for (int i = 0; i < N; i++) {
ranksCol1Mean += col1Ranks[int(m_columns[0]->valueAt(i))];
ranksCol2Mean += col2Ranks[int(m_columns[1]->valueAt(i))];
}
ranksCol1Mean /= N;
ranksCol2Mean /= N;
double s12 = 0;
double s1 = 0;
double s2 = 0;
for (int i = 0; i < N; i++) {
double centeredRank_1 = col1Ranks[int(m_columns[0]->valueAt(i))] - ranksCol1Mean;
double centeredRank_2 = col2Ranks[int(m_columns[1]->valueAt(i))] - ranksCol2Mean;
s12 += centeredRank_1 * centeredRank_2;
s1 += gsl_pow_2(centeredRank_1);
s2 += gsl_pow_2(centeredRank_2);
}
s12 /= N;
s1 /= N;
s2 /= N;
m_correlationValue = s12 / std::sqrt(s1 * s2);
printLine(0, QString("Spearman Rank Correlation value is %1").arg(m_correlationValue), "green");
}
/***********************************************Chi Square Test for Indpendence******************************************************************/
+// TODO: Implement this function
void CorrelationCoefficient::performChiSquareIndpendence(bool calculateStats) {
Q_UNUSED(calculateStats);
}
/***********************************************Helper Functions******************************************************************/
int CorrelationCoefficient::findDiscordants(int *ranks, int start, int end) {
if (start >= end)
return 0;
int mid = (start + end) / 2;
int leftDiscordants = findDiscordants(ranks, start, mid);
int rightDiscordants = findDiscordants(ranks, mid + 1, end);
int len = end - start + 1;
int leftLen = mid - start + 1;
int rightLen = end - mid;
int leftLenRemain = leftLen;
QVector leftRanks(leftLen);
QVector rightRanks(rightLen);
for (int i = 0; i < leftLen; i++)
leftRanks[i] = ranks[start + i];
for (int i = leftLen; i < leftLen + rightLen; i++)
rightRanks[i - leftLen] = ranks[start + i];
int mergeDiscordants = 0;
int i = 0, j = 0, k =0;
while (i < len) {
if (j >= leftLen) {
ranks[start + i] = rightRanks[k];
k++;
} else if (k >= rightLen) {
ranks[start + i] = leftRanks[j];
j++;
} else if (leftRanks[j] < rightRanks[k]) {
ranks[start + i] = leftRanks[j];
j++;
leftLenRemain--;
} else if (leftRanks[j] > rightRanks[k]) {
ranks[start + i] = rightRanks[k];
mergeDiscordants += leftLenRemain;
k++;
}
i++;
}
return leftDiscordants + rightDiscordants + mergeDiscordants;
}
void CorrelationCoefficient::convertToRanks(const Column* col, int N, QMap &ranks) {
if (col->isNumeric())
return;
double* sortedList = new double[N];
for (int i = 0; i < N; i++)
sortedList[i] = col->valueAt(i);
std::sort(sortedList, sortedList + N, std::greater());
ranks.clear();
for (int i = 0; i < N; i++)
ranks[sortedList[i]] = i + 1;
delete[] sortedList;
}
void CorrelationCoefficient::convertToRanks(const Column* col, QMap &ranks) {
convertToRanks(col, findCount(col), ranks);
}
/***********************************************Virtual Functions******************************************************************/
QWidget* CorrelationCoefficient::view() const {
if (!m_partView) {
m_view = new CorrelationCoefficientView(const_cast(this));
m_partView = m_view;
}
return m_partView;
}
diff --git a/src/backend/generalTest/HypothesisTest.cpp b/src/backend/generalTest/HypothesisTest.cpp
index 872afbf67..9f858c094 100644
--- a/src/backend/generalTest/HypothesisTest.cpp
+++ b/src/backend/generalTest/HypothesisTest.cpp
@@ -1,1167 +1,1159 @@
/***************************************************************************
File : HypothesisTest.cpp
Project : LabPlot
Description : Doing Hypothesis-Test on data provided
--------------------------------------------------------------------
Copyright : (C) 2019 Devanshu Agarwal(agarwaldevanshu8@gmail.com)
***************************************************************************/
/***************************************************************************
* *
* This program is free software; you can redistribute it and/or modify *
* it under the terms of the GNU General Public License as published by *
* the Free Software Foundation; either version 2 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU General Public License for more details. *
* *
* You should have received a copy of the GNU General Public License *
* along with this program; if not, write to the Free Software *
* Foundation, Inc., 51 Franklin Street, Fifth Floor, *
* Boston, MA 02110-1301 USA *
* *
***************************************************************************/
#include "HypothesisTest.h"
#include "kdefrontend/generalTest/HypothesisTestView.h"
#include "backend/spreadsheet/Spreadsheet.h"
#include "backend/core/column/Column.h"
#include "backend/lib/macros.h"
#include
#include
#include
#include
#include
#include
#include
#include
extern "C" {
#include "backend/nsl/nsl_stats.h"
}
HypothesisTest::HypothesisTest(const QString &name) : GeneralTest(name, AspectType::HypothesisTest) {
}
HypothesisTest::~HypothesisTest() {
}
void HypothesisTest::setPopulationMean(QVariant populationMean) {
m_populationMean = populationMean.toDouble();
}
void HypothesisTest::setSignificanceLevel(QVariant alpha) {
m_significanceLevel = alpha.toDouble();
}
void HypothesisTest::setTail(HypothesisTailType tail) {
m_tail = tail;
}
void HypothesisTest::performTest(int test, bool categoricalVariable, bool equalVariance, bool calculateStats) {
m_pValue.clear();
m_statisticValue.clear();
m_statsTable = "";
for (int i = 0; i < RESULTLINESCOUNT; i++) {
m_resultLine[i]->clear();
m_resultLine[i]->setToolTip(QString());
}
if (calculateStats)
m_inputStatsTableModel->clear();
switch (testSubtype(test)) {
case TwoSampleIndependent: {
m_currTestName = "" + i18n("Two Sample Independent Test") + "
";
performTwoSampleIndependentTest(testType(test), categoricalVariable, equalVariance, calculateStats);
break;
}
case TwoSamplePaired:
m_currTestName = "" + i18n("Two Sample Paired Test") + "
";
performTwoSamplePairedTest(testType(test));
break;
case OneSample: {
m_currTestName = "" + i18n("One Sample Test") + "
";
performOneSampleTest(testType(test));
break;
}
case OneWay: {
m_currTestName = "" + i18n("One Way Anova") + "
";
performOneWayAnova();
break;
}
case TwoWay: {
m_currTestName = "" + i18n("Two Way Anova") + "
";
performTwoWayAnova();
break;
}
}
emit changed();
}
void HypothesisTest::performLeveneTest(bool categoricalVariable) {
m_pValue.clear();
m_statisticValue.clear();
m_statsTable = "";
for (int i = 0; i < RESULTLINESCOUNT; i++) {
m_resultLine[i]->clear();
m_resultLine[i]->setToolTip(QString());
}
m_currTestName = "" + i18n("Levene Test for Equality of Variance") + "
";
m_performLeveneTest(categoricalVariable);
emit changed();
}
void HypothesisTest::initInputStatsTable(int test, bool calculateStats) {
m_inputStatsTableModel->clear();
if (!calculateStats) {
- switch (testSubtype(test)) {
- case TwoSampleIndependent: {
+ if (testSubtype(test) == TwoSampleIndependent) {
m_inputStatsTableModel->setRowCount(2);
m_inputStatsTableModel->setColumnCount(4);
m_inputStatsTableModel->setHorizontalHeaderLabels(
{i18n("N"), i18n("Sum"), i18n("Mean"), i18n("Standard Deviation")});
- break;
- }
- case TwoSamplePaired:
- case OneSample:
- case OneWay:
- case TwoWay:
- break;
}
}
emit changed();
}
QList& HypothesisTest::statisticValue() {
return m_statisticValue;
}
QList& HypothesisTest::pValue() {
return m_pValue;
}
/******************************************************************************
* Private Implementations
* ****************************************************************************/
//TODO: backend of z test;
//TODO: use https://www.gnu.org/software/gsl/doc/html/statistics.html for basic statistic calculations
/**************************Two Sample Independent *************************************/
// program is crashing on some input.
void HypothesisTest::performTwoSampleIndependentTest(int test, bool categoricalVariable, bool equalVariance, bool calculateStats) {
int n[2];
double sum[2], mean[2], std[2];
QString col1Name;
QString col2Name;
if (!calculateStats) {
QString textValue;
QDEBUG("m_inputStatsTable row and column count " << m_inputStatsTableModel->rowCount() << m_inputStatsTableModel->columnCount());
for (int i = 0; i < 2; i++) {
n[i] = m_inputStatsTableModel->data(m_inputStatsTableModel->index(i, 0)).toInt();
sum[i] = m_inputStatsTableModel->data(m_inputStatsTableModel->index(i, 1)).toDouble();
mean[i] = m_inputStatsTableModel->data(m_inputStatsTableModel->index(i, 2)).toDouble();
std[i] = m_inputStatsTableModel->data(m_inputStatsTableModel->index(i, 3)).toDouble();
if (sum[i] == 0.0)
sum[i] = mean[i] * n[i];
if (mean[i] == 0.0 && n[i] > 0)
mean[i] = sum[i] / n[i];
col1Name = "1";
col2Name = "2";
}
} else {
if (m_columns.size() != 2) {
printError("Inappropriate number of m_columns selected");
return;
}
col1Name = m_columns[0]->name();
col2Name = m_columns[1]->name();
if (!categoricalVariable && m_columns[0]->isNumeric()) {
for (int i = 0; i < 2; i++) {
findStats(m_columns[i], n[i], sum[i], mean[i], std[i]);
if (n[i] == 0) {
printError("At least two values should be there in every column");
return;
}
if (std[i] == 0.0) {
printError(i18n("Standard Deviation of at least one column is equal to 0: last column is: %1", m_columns[i]->name()));
return;
}
}
} else {
QMap colName;
QString baseColName;
int np;
int totalRows;
countPartitions(m_columns[0], np, totalRows);
if (np != 2) {
printError( i18n("Number of Categorical Variable in Column %1 is not equal to 2", m_columns[0]->name()));
return;
}
if (m_columns[0]->isNumeric())
baseColName = m_columns[0]->name();
GeneralErrorType errorCode = findStatsCategorical(m_columns[0], m_columns[1], n, sum, mean, std, colName, np, totalRows);
switch (errorCode) {
case ErrorUnqualSize: {
printError( i18n("Unequal size between Column %1 and Column %2", m_columns[0]->name(), m_columns[1]->name()));
return;
}
case ErrorEmptyColumn: {
printError("At least one of selected column is empty");
return;
}
case NoError:
break;
}
QMapIterator iter(colName);
while (iter.hasNext()) {
iter.next();
if (iter.value() == 1)
col1Name = baseColName + " " + iter.key();
else
col2Name = baseColName + " " + iter.key();
}
}
}
QVariant rowMajor[] = {"", "N", "Sum", "Mean", "Std",
col1Name, n[0], sum[0], mean[0], std[0],
col2Name, n[1], sum[1], mean[1], std[1]
};
m_statsTable = getHtmlTable(3, 5, rowMajor);
for (int i = 0; i < 2; i++) {
if (n[i] == 0) {
if (calculateStats)
printError("At least two values should be there in every column");
else
printError("In Statistic Table you filled above, value of N for every row should be > 0");
return;
}
if (std[i] == 0.0) {
printError( i18n("Standard Deviation of at least one column is equal to 0: last column is: %1", m_columns[i]->name()));
return;
}
}
double stdSq[2] = {gsl_pow_2(std[0]), gsl_pow_2(std[1])};
QString testName;
int df = 0;
double spSq = 0;
switch (testType(test)) {
case TTest: {
testName = "T";
if (equalVariance) {
df = n[0] + n[1] - 2;
spSq = ((n[0]-1) * stdSq[0] +
(n[1]-1) * stdSq[1] ) / df;
// QDEBUG("equal variance : spSq is " << spSq);
m_statisticValue.append((mean[0] - mean[1]) / sqrt(spSq / n[0] + spSq / n[1]));
printLine(9, "Assumption: Equal Variance b/w both population means");
} else {
double temp_val;
temp_val = gsl_pow_2( gsl_pow_2(std[0]) / n[0] + gsl_pow_2(std[1]) / n[1]);
temp_val = temp_val / ( (gsl_pow_2( (gsl_pow_2(std[0]) / n[0]) ) / (n[0]-1)) +
(gsl_pow_2( (gsl_pow_2(std[1]) / n[1]) ) / (n[1]-1)));
df = qRound(temp_val);
m_statisticValue.append((mean[0] - mean[1]) / (sqrt( (gsl_pow_2(std[0])/n[0]) +
(gsl_pow_2(std[1])/n[1]))));
printLine(9, "Assumption: UnEqual Variance b/w both population means");
}
printLine(6, i18n("Degree of Freedom is %1", df), "green");
printTooltip(6, i18n("Number of independent Pieces of information that went into calculating the estimate"));
printLine(8, "Assumption: Both Populations approximately follow normal distribution");
break;
}
case ZTest: {
testName = "Z";
spSq = stdSq[0] / n[0] + stdSq[1] / n[1];
double zValue = (mean[0] - mean[1]) / sqrt(spSq);
m_statisticValue.append(zValue);
break;
}
}
m_currTestName = "" + i18n("Two Sample Independent %1 Test for %2 vs %3", testName, col1Name, col2Name) + "
";
m_pValue.append(getPValue(test, m_statisticValue[0], col1Name, col2Name, df));
printLine(2, i18n("Significance level is %1", round(m_significanceLevel)), "blue");
printLine(4, i18n("%1 Value is %2 ", testName, round(m_statisticValue[0])), "green");
printTooltip(4, i18n("More is the |%1-value|, more safely we can reject the null hypothesis", testName));
printLine(5, i18n("P Value is %1 ", m_pValue[0]), "green");
printTooltip(5, getPValueTooltip(m_pValue[0]));
return;
}
/********************************Two Sample Paired ***************************************/
void HypothesisTest::performTwoSamplePairedTest(int test) {
if (m_columns.size() != 2) {
printError("Inappropriate number of m_columns selected");
return;
}
for (int i = 0; i < 2; i++) {
if ( !m_columns[0]->isNumeric()) {
printError("select only m_columns with numbers");
return;
}
}
int n;
double sum, mean, std;
GeneralErrorType errorCode = findStatsPaired(m_columns[0], m_columns[1], n, sum, mean, std);
switch (errorCode) {
case ErrorUnqualSize: {
printError("both m_columns are having different sizes");
return;
}
case ErrorEmptyColumn: {
printError("m_columns are empty");
return;
}
case NoError:
break;
}
QVariant rowMajor[] = {"", "N", "Sum", "Mean", "Std",
"difference", n, sum, mean, std
};
m_statsTable = getHtmlTable(2, 5, rowMajor);
if (std == 0.0) {
printError("Standard deviation of the difference is 0");
return;
}
QString testName;
int df = 0;
switch (test) {
case TTest: {
m_statisticValue[0] = mean / (std / sqrt(n));
df = n - 1;
testName = "T";
printLine(6, i18n("Degree of Freedom is %1
name(), i18n("%1", m_populationMean), df));
m_currTestName = "" + i18n("One Sample %1 Test for %2 vs %3", testName, m_columns[0]->name(), m_columns[1]->name()) + "
";
printLine(2, i18n("Significance level is %1 ", round(m_significanceLevel)), "blue");
printLine(4, i18n("%1 Value is %2 ", testName, round(m_statisticValue[0])), "green");
printLine(5, i18n("P Value is %1 ", m_pValue[0]), "green");
printTooltip(5, getPValueTooltip(m_pValue[0]));
return;
}
/******************************** One Sample ***************************************/
void HypothesisTest::performOneSampleTest(int test) {
if (m_columns.size() != 1) {
printError("Inappropriate number of m_columns selected");
return;
}
if ( !m_columns[0]->isNumeric()) {
printError("select only m_columns with numbers");
return;
}
int n;
double sum, mean, std;
GeneralErrorType errorCode = findStats(m_columns[0], n, sum, mean, std);
switch (errorCode) {
case ErrorEmptyColumn: {
printError("column is empty");
return;
}
case NoError:
break;
case ErrorUnqualSize: {
return;
}
}
QVariant rowMajor[] = {"", "N", "Sum", "Mean", "Std",
m_columns[0]->name(), n, sum, mean, std
};
m_statsTable = getHtmlTable(2, 5, rowMajor);
if (std == 0.0) {
printError("Standard deviation is 0");
return;
}
QString testName;
int df = 0;
switch (test) {
case TTest: {
testName = "T";
m_statisticValue.append((mean - m_populationMean) / (std / sqrt(n)));
df = n - 1;
printLine(6, i18n("Degree of Freedom is %1", df), "blue");
break;
}
case ZTest: {
testName = "Z";
df = 0;
m_statisticValue.append((mean - m_populationMean) / (std / sqrt(n)));
break;
}
}
m_pValue.append(getPValue(test, m_statisticValue[0], m_columns[0]->name(), i18n("%1",m_populationMean), df));
m_currTestName = "" + i18n("One Sample %1 Test for %2", testName, m_columns[0]->name()) + "
";
printLine(2, i18n("Significance level is %1", round(m_significanceLevel)), "blue");
printLine(4, i18n("%1 Value is %2", testName, round(m_statisticValue[0])), "green");
printLine(5, i18n("P Value is %1", m_pValue[0]), "green");
printTooltip(5, getPValueTooltip(m_pValue[0]));
return;
}
/*************************************One Way Anova***************************************/
// all standard variables and formulas are taken from this wikipedia page:
// https://en.wikipedia.org/wiki/One-way_analysis_of_variance
// b stands for b/w groups
// w stands for within groups
// np is number of partition i.e., number of classes
void HypothesisTest::performOneWayAnova() {
int np, totalRows;
countPartitions(m_columns[0], np, totalRows);
int* ni = new int[np];
double* sum = new double[np];
double* mean = new double[np];
double* std = new double[np];
QString* colNames = new QString[np];
QMap classnameToIndex;
QString baseColName;
if (m_columns[0]->isNumeric())
baseColName = m_columns[0]->name();
findStatsCategorical(m_columns[0], m_columns[1], ni, sum, mean, std, classnameToIndex, np, totalRows);
double yBar = 0; // overall mean
double sB = 0; // sum of squares of (mean - overall_mean) between the groups
int fB = 0; // degree of freedom between the groups
double msB = 0; // mean sum of squares between the groups
double sW = 0; // sum of squares of (value - mean of group) within the groups
int fW = 0; // degree of freedom within the group
double msW = 0; // mean sum of squares within the groups
// now finding mean of each group;
for (int i = 0; i < np; i++)
yBar += mean[i];
yBar /= np;
for (int i = 0; i < np; i++) {
sB += ni[i] * gsl_pow_2(mean[i] - yBar);
if (ni[i] > 1)
sW += gsl_pow_2(std[i])*(ni[i] - 1);
else
sW += gsl_pow_2(std[i]);
fW += ni[i] - 1;
}
fB = np - 1;
msB = sB / fB;
msW = sW / fW;
m_statisticValue.append(msB / msW);
m_pValue.append(nsl_stats_fdist_p(m_statisticValue[0], static_cast(np-1), fW));
QMapIterator i(classnameToIndex);
while (i.hasNext()) {
i.next();
colNames[i.value()-1] = baseColName + " " + i.key();
}
// now printing the statistics and result;
int rowCount = np + 1, columnCount = 5;
QVariant* rowMajor = new QVariant[rowCount*columnCount];
// header data;
rowMajor[0] = "";
rowMajor[1] = "Ni";
rowMajor[2] = "Sum";
rowMajor[3] = "Mean";
rowMajor[4] = "Std";
// table data
for (int row_i = 1; row_i < rowCount ; row_i++) {
rowMajor[row_i*columnCount] = colNames[row_i - 1];
rowMajor[row_i*columnCount + 1] = ni[row_i - 1];
rowMajor[row_i*columnCount + 2] = sum[row_i - 1];
rowMajor[row_i*columnCount + 3] = mean[row_i - 1];
rowMajor[row_i*columnCount + 4] = std[row_i - 1];
}
m_statsTable = "" + i18n("Group Summary Statistics") + "
";
m_statsTable += getHtmlTable(rowCount, columnCount, rowMajor);
m_statsTable += getLine("");
m_statsTable += getLine("");
m_statsTable += "" + i18n("Grand Summary Statistics") + "
";
m_statsTable += getLine("");
m_statsTable += getLine(i18n("Overall Mean is %1", round(yBar)));
rowCount = 4;
columnCount = 3;
rowMajor->clear();
rowMajor[0] = "";
rowMajor[1] = "Between Groups";
rowMajor[2] = "Within Groups";
int baseIndex = 0;
baseIndex = 1 * columnCount;
rowMajor[baseIndex + 0] = "Sum of Squares";
rowMajor[baseIndex + 1] = sB;
rowMajor[baseIndex + 2] = sW;
baseIndex = 2 * columnCount;
rowMajor[baseIndex + 0] = "Degree of Freedom";
rowMajor[baseIndex + 1] = fB;
rowMajor[baseIndex + 2] = fW;
baseIndex = 3 * columnCount;
rowMajor[baseIndex + 0] = "Mean Square Value";
rowMajor[baseIndex + 1] = msB;
rowMajor[baseIndex + 2] = msW;
m_statsTable += getHtmlTable(rowCount, columnCount, rowMajor);
delete[] ni;
delete[] sum;
delete[] mean;
delete[] std;
delete[] colNames;
printLine(1, i18n("F Value is %1", round(m_statisticValue[0])), "green");
printLine(2, i18n("P Value is %1 ", m_pValue[0]), "green");
printTooltip(2, getPValueTooltip(m_pValue[0]));
return;
}
/*************************************Two Way Anova***************************************/
// all formulas and symbols are taken from: http://statweb.stanford.edu/~susan/courses/s141/exanova.pdf
//TODO: suppress warning of variable length array are a C99 feature.
//TODO: add assumptions verification option
//TODO: add tail option (if needed)
void HypothesisTest::performTwoWayAnova() {
int np_a, totalRows_a;
int np_b, totalRows_b;
countPartitions(m_columns[0], np_a, totalRows_a);
countPartitions(m_columns[1], np_b, totalRows_b);
QVector> groupMean(np_a, QVector(np_b));
QVector> replicates(np_a, QVector(np_b));
for (int i = 0; i < np_a; i++)
for (int j = 0; j < np_b; j++) {
groupMean[i][j] = 0;
replicates[i][j] = 0;
}
if (totalRows_a != totalRows_b) {
printError("There is missing data in at least one of the rows");
return;
}
QMap catToNumber_a;
QMap catToNumber_b;
int partitionNumber_a = 1;
int partitionNumber_b = 1;
for (int i = 0; i < totalRows_a; i++) {
QString name_a = m_columns[0]->textAt(i);
QString name_b = m_columns[1]->textAt(i);
double value = m_columns[2]->valueAt(i);
if (catToNumber_a[name_a] == 0) {
catToNumber_a[name_a] = partitionNumber_a;
partitionNumber_a++;
}
if (catToNumber_b[name_b] == 0) {
catToNumber_b[name_b] = partitionNumber_b;
partitionNumber_b++;
}
groupMean[catToNumber_a[name_a] - 1][catToNumber_b[name_b] - 1] += value;
replicates[catToNumber_a[name_a] - 1][catToNumber_b[name_b] - 1] += 1;
}
int replicate = replicates[0][0];
for (int i = 0; i < np_a; i++)
for (int j = 0; j < np_b; j++) {
if (replicates[i][j] == 0) {
printError("Dataset should have at least one data value corresponding to each feature combination");
return;
}
if (replicates[i][j] != replicate) {
printError("Number of experiments perfomed for each combination of levels
"
"between Independet Var.1 and Independent Var.2 must be equal");
return;
}
groupMean[i][j] /= replicates[i][j];
}
double ss_within = 0;
for (int i = 0; i < totalRows_a; i++) {
QString name_a = m_columns[0]->textAt(i);
QString name_b = m_columns[1]->textAt(i);
double value = m_columns[2]->valueAt(i);
ss_within += gsl_pow_2(value - groupMean[catToNumber_a[name_a] - 1][catToNumber_b[name_b] - 1]);
}
int df_within = (replicate - 1) * np_a * np_b;
double ms_within = ss_within / df_within;
double* mean_a = new double[np_a];
double* mean_b = new double[np_b];
for (int i = 0; i < np_a; i++) {
for (int j = 0; j < np_b; j++) {
mean_a[i] += groupMean[i][j] / np_b;
mean_b[j] += groupMean[i][j] / np_a;
}
}
double mean = 0;
for (int i = 0; i < np_a; i++)
mean += mean_a[i] / np_a;
double ss_a = 0;
for (int i = 0; i < np_a; i++)
ss_a += gsl_pow_2(mean_a[i] - mean);
ss_a *= replicate * np_b;
int df_a = np_a - 1;
double ms_a = ss_a / df_a;
double ss_b = 0;
for (int i = 0; i < np_b; i++)
ss_b += gsl_pow_2(mean_b[i] - mean);
ss_b *= replicate * np_a;
int df_b = np_b - 1;
double ms_b = ss_b / df_b;
double ss_interaction = 0;
for (int i = 0; i < np_a; i++)
for (int j = 0; j < np_b; j++)
ss_interaction += gsl_pow_2(groupMean[i][j] - mean_a[i] - mean_b[j] + mean);
ss_interaction *= replicate;
int df_interaction = (np_a - 1) * (np_b - 1);
double ms_interaction = ss_interaction / df_interaction;
QString* partitionNames_a = new QString[np_a];
QString* partitionNames_b = new QString[np_b];
QMapIterator itr_a(catToNumber_a);
while (itr_a.hasNext()) {
itr_a.next();
partitionNames_a[itr_a.value()-1] = itr_a.key();
}
QMapIterator itr_b(catToNumber_b);
while (itr_b.hasNext()) {
itr_b.next();
partitionNames_b[itr_b.value()-1] = itr_b.key();
}
// printing table;
// HtmlCell constructor structure; data, level, rowSpanCount, m_columnspanCount, isHeader;
QList rowMajor;
rowMajor.append(new HtmlCell("", 0, true, "", 2, 1));
for (int i = 0; i < np_b; i++)
rowMajor.append(new HtmlCell(partitionNames_b[i], 0, true, "", 1, 2));
rowMajor.append(new HtmlCell("Mean", 0, true, "", 2));
for (int i = 0; i < np_b; i++) {
rowMajor.append(new HtmlCell("Mean", 1, true));
rowMajor.append(new HtmlCell("Replicate", 1, true));
}
int level = 2;
for (int i = 0; i < np_a; i++) {
rowMajor.append(new HtmlCell(partitionNames_a[i], level, true));
for (int j = 0; j < np_b; j++) {
rowMajor.append(new HtmlCell(round(groupMean[i][j]), level));
rowMajor.append(new HtmlCell(replicates[i][j], level));
}
rowMajor.append(new HtmlCell(round(mean_a[i]), level));
level++;
}
rowMajor.append(new HtmlCell("Mean", level, true));
for (int i = 0; i < np_b; i++)
rowMajor.append(new HtmlCell(round(mean_b[i]), level, false, "", 1, 2));
rowMajor.append(new HtmlCell(round(mean), level));
m_statsTable = "" + i18n("Contingency Table") + "
";
m_statsTable += getHtmlTable3(rowMajor);
m_statsTable += "";
m_statsTable += "" + i18n("results table") + "
";
rowMajor.clear();
level = 0;
rowMajor.append(new HtmlCell("", level, true));
rowMajor.append(new HtmlCell("SS", level, true));
rowMajor.append(new HtmlCell("DF", level, true, "degree of freedom"));
rowMajor.append(new HtmlCell("MS", level, true));
level++;
rowMajor.append(new HtmlCell(m_columns[0]->name(), level, true));
rowMajor.append(new HtmlCell(round(ss_a), level));
rowMajor.append(new HtmlCell(df_a, level));
rowMajor.append(new HtmlCell(round(ms_a), level));
level++;
rowMajor.append(new HtmlCell(m_columns[1]->name(), level, true));
rowMajor.append(new HtmlCell(round(ss_b), level));
rowMajor.append(new HtmlCell(df_b, level));
rowMajor.append(new HtmlCell(round(ms_b), level));
level++;
rowMajor.append(new HtmlCell("Interaction", level, true));
rowMajor.append(new HtmlCell(round(ss_interaction), level));
rowMajor.append(new HtmlCell(df_interaction, level));
rowMajor.append(new HtmlCell(round(ms_interaction), level));
level++;
rowMajor.append(new HtmlCell("Within", level, true));
rowMajor.append(new HtmlCell(round(ss_within), level));
rowMajor.append(new HtmlCell(df_within, level));
rowMajor.append(new HtmlCell(round(ms_within), level));
m_statsTable += getHtmlTable3(rowMajor);
double fValue_a = ms_a / ms_within;
double fValue_b = ms_b / ms_within;
double fValue_interaction = ms_interaction / ms_within;
double m_pValue_a = nsl_stats_fdist_p(fValue_a, static_cast(np_a - 1), df_a);
double m_pValue_b = nsl_stats_fdist_p(fValue_b, static_cast(np_b - 1), df_b);
printLine(0, "F(df" + m_columns[0]->name() + ", dfwithin) is " + round(fValue_a), "blue");
printLine(1, "F(df" + m_columns[1]->name() + ", dfwithin) is " + round(fValue_b), "blue");
printLine(2, "F(dfinteraction, dfwithin) is " + round(fValue_interaction), "blue");
printLine(4, "P(df" + m_columns[0]->name() + ", dfwithin) is " + round(m_pValue_a), "blue");
printLine(5, "P(df" + m_columns[1]->name() + ", dfwithin) is " + round(m_pValue_b), "blue");
// printLine(2, "P(dfinteraction, dfwithin) is " + round(fValue_interaction), "blue");
m_statisticValue.append(fValue_a);
m_statisticValue.append(fValue_b);
m_statisticValue.append(fValue_interaction);
m_pValue.append(m_pValue_a);
m_pValue.append(m_pValue_b);
delete[] mean_a;
delete[] mean_b;
delete[] partitionNames_a;
delete[] partitionNames_b;
return;
}
/**************************************Levene Test****************************************/
// Some reference to local variables.
// np = number of partitions
// df = degree of fredom
// totalRows = total number of rows in column
// these variables are taken from: https://en.wikipedia.org/wiki/Levene%27s_test
// yiBar = mean of ith group;
// Zij = |Yij - yiBar|
// ziBar = mean of Zij for group i
// ziBarBar = mean for all zij
// ni = number of elements in group i
void HypothesisTest::m_performLeveneTest(bool categoricalVariable) {
if (m_columns.size() != 2) {
printError("Inappropriate number of m_columns selected");
return;
}
int np = 0;
int n = 0;
if (!categoricalVariable && m_columns[0]->isNumeric())
np = m_columns.size();
else
countPartitions(m_columns[0], np, n);
if (np < 2) {
printError("Select at least two m_columns / classes");
return;
}
double* yiBar = new double[np];
double* ziBar = new double[np];
double ziBarBar = 0;
double* ni = new double[np];
for (int i = 0; i < np; i++) {
yiBar[i] = 0;
ziBar[i] = 0;
ni[i] = 0;
}
double fValue;
int df = 0;
int totalRows = 0;
QString* colNames = new QString[np];
if (!categoricalVariable && m_columns[0]->isNumeric()) {
totalRows = m_columns[0]->rowCount();
double value = 0;
for (int j = 0; j < totalRows; j++) {
int numberNaNCols = 0;
for (int i = 0; i < np; i++) {
value = m_columns[i]->valueAt(j);
if (std::isnan(value)) {
numberNaNCols++;
continue;
}
yiBar[i] += value;
ni[i]++;
n++;
}
if (numberNaNCols == np) {
totalRows = j;
break;
}
}
for (int i = 0; i < np; i++) {
if (ni[i] > 0)
yiBar[i] /= ni[i];
else {
printError("One of the selected m_columns is empty
"
"or have choosen Independent Var.1 wrongly");
return;
}
}
for (int j = 0; j < totalRows; j++) {
for (int i = 0; i < np; i++) {
value = m_columns[i]->valueAt(j);
if (!(std::isnan(value)))
ziBar[i] += fabs(value - yiBar[i]);
}
}
for (int i = 0; i < np; i++) {
ziBarBar += ziBar[i];
if (ni[i] > 0)
ziBar[i] /= ni[i];
}
ziBarBar /= n;
double numberatorValue = 0;
double denominatorValue = 0;
for (int j = 0; j < totalRows; j++) {
for (int i = 0; i < np; i++) {
value = m_columns[i]->valueAt(j);
if (!(std::isnan(value))) {
double zij = fabs(value - yiBar[i]);
denominatorValue += gsl_pow_2( (zij - ziBar[i]));
}
}
}
if (denominatorValue == 0.0) {
printError( i18n("Denominator value is %1", denominatorValue));
return;
}
for (int i = 0; i < np; i++) {
colNames[i] = m_columns[i]->name();
numberatorValue += ni[i]*gsl_pow_2( (ziBar[i]-ziBarBar));
}
fValue = ((n - np) / (np - 1)) * (numberatorValue / denominatorValue);
} else {
QMap classnameToIndex;
AbstractColumn::ColumnMode originalColMode = m_columns[0]->columnMode();
m_columns[0]->setColumnMode(AbstractColumn::Text);
int partitionNumber = 1;
QString name;
double value;
int classIndex;
for (int j = 0; j < n; j++) {
name = m_columns[0]->textAt(j);
value = m_columns[1]->valueAt(j);
if (std::isnan(value)) {
n = j;
break;
}
if (classnameToIndex[name] == 0) {
classnameToIndex[name] = partitionNumber;
partitionNumber++;
}
classIndex = classnameToIndex[name]-1;
ni[classIndex]++;
yiBar[classIndex] += value;
}
for (int i = 0; i < np; i++) {
if (ni[i] > 0)
yiBar[i] /= ni[i];
else {
printError("One of the selected m_columns is empty
"
"or have choosen Independent Var.1 wrongly");
m_columns[0]->setColumnMode(originalColMode);
return;
}
}
for (int j = 0; j < n; j++) {
name = m_columns[0]->textAt(j);
value = m_columns[1]->valueAt(j);
classIndex = classnameToIndex[name] - 1;
ziBar[classIndex] += fabs(value - yiBar[classIndex]);
}
for (int i = 0; i < np; i++) {
ziBarBar += ziBar[i];
ziBar[i] /= ni[i];
}
ziBarBar /= n;
double numberatorValue = 0;
double denominatorValue = 0;
for (int j = 0; j < n; j++) {
name = m_columns[0]->textAt(j);
value = m_columns[1]->valueAt(j);
classIndex = classnameToIndex[name] - 1;
double zij = fabs(value - yiBar[classIndex]);
denominatorValue += gsl_pow_2( (zij - ziBar[classIndex]));
}
for (int i = 0; i < np; i++)
numberatorValue += ni[i]*gsl_pow_2( (ziBar[i]-ziBarBar));
if (denominatorValue == 0.0) {
printError( "number of data points is less or than equal to number of categorical variables");
m_columns[0]->setColumnMode(originalColMode);
return;
}
fValue = ((n - np) / (np - 1)) * (numberatorValue / denominatorValue);
QMapIterator i(classnameToIndex);
while (i.hasNext()) {
i.next();
colNames[i.value()-1] = m_columns[0]->name() + " " + i.key();
}
m_columns[0]->setColumnMode(originalColMode);
}
df = n - np;
// now making the stats table.
int rowCount = np+1;
int columnCount = 4;
QVariant* rowMajor = new QVariant[rowCount*columnCount];
// header data;
rowMajor[0] = "";
rowMajor[1] = "Ni";
rowMajor[2] = "yiBar";
rowMajor[3] = "ziBar";
// table data
for (int row_i = 1; row_i < rowCount; row_i++) {
rowMajor[row_i*columnCount] = colNames[row_i-1];
rowMajor[row_i*columnCount + 1] = ni[row_i-1];
rowMajor[row_i*columnCount + 2] = yiBar[row_i-1];
rowMajor[row_i*columnCount + 3] = ziBar[row_i-1];
}
m_statsTable = getHtmlTable(rowCount, columnCount, rowMajor);
delete[] rowMajor;
delete[] yiBar;
delete[] ziBar;
delete[] ni;
m_pValue.append(nsl_stats_fdist_p(fValue, static_cast(np-1), df));
printLine(0, "Null Hypothesis: Variance is equal between all classes", "blue");
printLine(1, "Alternate Hypothesis: Variance is not equal in at-least one pair of classes", "blue");
printLine(2, i18n("Significance level is %1", round(m_significanceLevel)), "blue");
printLine(4, i18n("F Value is %1 ", round(fValue)), "green");
printLine(5, i18n("P Value is %1 ", m_pValue[0]), "green");
printLine(6, i18n("Degree of Freedom is %1", df), "green");
printTooltip(5, getPValueTooltip(m_pValue[0]));
if (m_pValue[0] <= m_significanceLevel)
printLine(8, "Requirement for homogeneity is not met", "red");
else
printLine(8, "Requirement for homogeneity is met", "green");
m_statisticValue.append(fValue);
return;
}
//TODO change ("⋖") symbol to ("<"), currently macro UTF8_QSTRING is not working properly if used "<" symbol;
// TODO: check for correctness between: for TestZ with TailTwo
// m_pValue.append(2*gsl_cdf_tdist_P(value, df) v/s
// m_pValue.append(gsl_cdf_tdis_P(value, df) + gsl_cdf_tdis_P(-value, df);
double HypothesisTest::getPValue(const int& test, double& value, const QString& col1Name, const QString& col2Name, const int df) {
double pValue = 0;
QString nullHypothesisSign;
QString alternateHypothesisSign;
switch (testType(test)) {
case TTest: {
switch (m_tail) {
case Negative:
pValue = gsl_cdf_tdist_P(value, df);
nullHypothesisSign = UTF8_QSTRING("≥");
alternateHypothesisSign = UTF8_QSTRING("⋖");
break;
case Positive:
value *= -1;
pValue = gsl_cdf_tdist_P(value, df);
nullHypothesisSign = UTF8_QSTRING("≤");
alternateHypothesisSign = UTF8_QSTRING(">");
break;
case Two:
pValue = 2.*gsl_cdf_tdist_P(-fabs(value), df);
nullHypothesisSign = UTF8_QSTRING("=");
alternateHypothesisSign = UTF8_QSTRING("≠");
break;
}
break;
}
case ZTest: {
switch (m_tail) {
case Negative:
pValue = gsl_cdf_ugaussian_P(value);
nullHypothesisSign = UTF8_QSTRING("≥");
alternateHypothesisSign = UTF8_QSTRING("⋖");
break;
case Positive:
value *= -1;
pValue = gsl_cdf_ugaussian_P(value);
nullHypothesisSign = UTF8_QSTRING("≤");
alternateHypothesisSign = UTF8_QSTRING(">");
break;
case Two:
pValue = 2*gsl_cdf_ugaussian_P(-fabs(value));
nullHypothesisSign = UTF8_QSTRING("=");
alternateHypothesisSign = UTF8_QSTRING("≠");
break;
}
break;
}
}
printLine(0, i18n("Null Hypothesis: Population mean of %1 %2 Population mean of %3 ", col1Name, nullHypothesisSign, col2Name), "blue");
printLine(1, i18n("Alternate Hypothesis: Population mean of %1 %2 Population mean of %3 ", col1Name, alternateHypothesisSign, col2Name), "blue");
if (pValue > 1)
return 1;
return pValue;
}
QString HypothesisTest::getPValueTooltip(const double &pValue) {
if (pValue <= m_significanceLevel)
return i18n("We can safely reject Null Hypothesis for significance level %1", round(m_significanceLevel));
return i18n("There is a plausibility for Null Hypothesis to be true");
}
// Virtual functions
QWidget* HypothesisTest::view() const {
if (!m_partView) {
m_view = new HypothesisTestView(const_cast(this));
m_partView = m_view;
}
return m_partView;
}