diff --git a/src/backend/hypothesis_test/HypothesisTest.cpp b/src/backend/hypothesis_test/HypothesisTest.cpp index cdc333055..ce3b5939b 100644 --- a/src/backend/hypothesis_test/HypothesisTest.cpp +++ b/src/backend/hypothesis_test/HypothesisTest.cpp @@ -1,1015 +1,1015 @@ /*************************************************************************** File : HypothesisTest.cpp Project : LabPlot Description : Doing Hypothesis-Test on data provided -------------------------------------------------------------------- Copyright : (C) 2019 Devanshu Agarwal(agarwaldevanshu8@gmail.com) ***************************************************************************/ /*************************************************************************** * * * This program is free software; you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * * the Free Software Foundation; either version 2 of the License, or * * (at your option) any later version. * * * * This program is distributed in the hope that it will be useful, * * but WITHOUT ANY WARRANTY; without even the implied warranty of * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * * GNU General Public License for more details. * * * * You should have received a copy of the GNU General Public License * * along with this program; if not, write to the Free Software * * Foundation, Inc., 51 Franklin Street, Fifth Floor, * * Boston, MA 02110-1301 USA * * * ***************************************************************************/ #include "HypothesisTest.h" #include "HypothesisTestPrivate.h" #include "kdefrontend/hypothesis_test/HypothesisTestView.h" #include "backend/spreadsheet/Spreadsheet.h" #include "backend/core/column/Column.h" #include "backend/lib/macros.h" #include "QDebug" extern "C" { #include "backend/nsl/nsl_stats.h" } #include #include #include #include #include #include #include #include HypothesisTest::HypothesisTest(const QString &name) : AbstractPart(name), d(new HypothesisTestPrivate(this)) { } HypothesisTest::~HypothesisTest() { delete d; } QAbstractItemModel* HypothesisTest::dataModel() { return d->dataModel; } QAbstractItemModel* HypothesisTest::horizontalHeaderModel() { return d->horizontalHeaderModel; } QAbstractItemModel* HypothesisTest::verticalHeaderModel() { return d->verticalHeaderModel; } QAbstractItemModel* HypothesisTest::resultModel() { return d->resultModel; } void HypothesisTest::setDataSourceType(DataSourceType type) { if (type != d->dataSourceType) { d->dataSourceType = type; } } void HypothesisTest::setDataSourceSpreadsheet(Spreadsheet *spreadsheet) { if (spreadsheet != d->dataSourceSpreadsheet) d->setDataSourceSpreadsheet(spreadsheet); } QStringList HypothesisTest::allColumns() { return d->all_columns; } HypothesisTest::TailType HypothesisTest::tailType() { return d->tail_type; } void HypothesisTest::setPopulationMean(QVariant populationMean) { d->m_population_mean = populationMean.toDouble(); } void HypothesisTest::setSignificanceLevel(QVariant alpha) { d->m_significance_level = alpha.toDouble(); } void HypothesisTest::setTailType(HypothesisTest::TailType tailType) { d->tail_type = tailType; } void HypothesisTest::setColumns(QVector cols) { d->m_columns = cols; } void HypothesisTest::setColumns(QStringList cols) { return d->setColumns(cols); } HypothesisTest::DataSourceType HypothesisTest::dataSourceType() const { return d->dataSourceType; } void HypothesisTest::performTwoSampleTTest() { } void HypothesisTest::performTwoSampleIndependentTTest(bool equal_variance) { d->performTwoSampleIndependentTest(HypothesisTestPrivate::TestT, equal_variance); } void HypothesisTest::performTwoSamplePairedTTest() { d->performTwoSamplePairedTest(HypothesisTestPrivate::TestT); } void HypothesisTest::PerformOneSampleTTest() { d->PerformOneSampleTest(HypothesisTestPrivate::TestT); } void HypothesisTest::performTwoSampleIndependentZTest() { d->performTwoSampleIndependentTest(HypothesisTestPrivate::TestZ); } void HypothesisTest::performTwoSamplePairedZTest() { d->performTwoSamplePairedTest(HypothesisTestPrivate::TestZ); } void HypothesisTest::PerformOneSampleZTest() { d->PerformOneSampleTest(HypothesisTestPrivate::TestZ); } QString HypothesisTest::testName() { return d->m_currTestName; } QString HypothesisTest::statsTable() { return d->m_stats_table; } /****************************************************************************** * Private Implementations * ****************************************************************************/ HypothesisTestPrivate::HypothesisTestPrivate(HypothesisTest* owner) : q(owner) , dataModel(new QStandardItemModel) , horizontalHeaderModel(new QStandardItemModel) , verticalHeaderModel(new QStandardItemModel) , resultModel(new QStandardItemModel()){ } HypothesisTestPrivate::~HypothesisTestPrivate() { } void HypothesisTestPrivate::setDataSourceSpreadsheet(Spreadsheet *spreadsheet) { dataSourceSpreadsheet = spreadsheet; //setting rows and columns count; m_rowCount = dataSourceSpreadsheet->rowCount(); m_columnCount = dataSourceSpreadsheet->columnCount(); for (auto* col : dataSourceSpreadsheet->children()) { all_columns << col->name(); } } void HypothesisTestPrivate::setColumns(QStringList cols) { m_columns.clear(); Column* column = new Column("column"); for (QString col : cols) { if (col != "") { column = dataSourceSpreadsheet->column(col); m_columns.append(column); } } } /**************************Two Sample Independent *************************************/ void HypothesisTestPrivate::performTwoSampleIndependentTest(TestType test, bool equal_variance) { dataModel->clear(); horizontalHeaderModel->clear(); verticalHeaderModel->clear(); resultModel->clear(); QMessageBox* msg_box = new QMessageBox(); // checking for cols; if (m_columns.size() != 2) { msg_box->setText(i18n("Inappropriate number of columns selected")); msg_box->exec(); return; } int n[2]; double sum[2], mean[2], std[2]; QString col1_name = m_columns[0]->name(); QString col2_name = m_columns[1]->name(); if (m_columns[0]->columnMode() == AbstractColumn::Integer || m_columns[0]->columnMode() == AbstractColumn::Numeric) { for (int i = 0; i < 2; i++) { findStats(m_columns[i], n[i], sum[i], mean[i], std[i]); if (n[i] < 1) { msg_box->setText(i18n("atleast one of selected column is empty")); msg_box->exec(); return; } } } else { findStatsCategorical(n, sum, mean, std, col1_name, col2_name); if (n[0] == -1) { msg_box->setText(i18n("Unequal size between %1 and %2", m_columns[0]->name(), m_columns[1]->name())); msg_box->exec(); return; } else if(n[0] == -2) { msg_box->setText(i18n("There are more than two categorical variables in %1", m_columns[0]->name())); msg_box->exec(); return; } else if (n[0] == 0) { msg_box->setText(i18n("atleast one of selected column is empty")); msg_box->exec(); return; } } QVariant row_major[] = {"", "N", "Sum", "Mean", "Std", col1_name, n[0], sum[0], mean[0], std[0], col2_name, n[1], sum[1], mean[1], std[1]}; m_stats_table = getHtmlTable(3, 5, row_major); switch (test) { case TestT: { m_currTestName = i18n("Two Sample Independent T Test for %1 vs %2", col1_name, col2_name); double t; int df; - QString temp = ""; + QString temp_msg = ""; double p_value = 0; resultModel->setRowCount(9); resultModel->setColumnCount(1); if (equal_variance) { df = n[0] + n[1] - 2; //Assuming equal variance double sp = qSqrt( ((n[0]-1)*qPow(std[0],2) + (n[1]-1)*qPow(std[1],2))/df); t = (mean[0] - mean[1])/(sp*qSqrt(1.0/n[0] + 1.0/n[1])); resultModel->setData(resultModel->index(8, 0), i18n("Assumption: Equal Variance between both populations"), Qt::DisplayRole); } else { - double temp; - temp = qPow( qPow(std[0], 2)/n[0] + qPow(std[1], 2)/n[1], 2); - temp = temp / ( (qPow( (qPow(std[0], 2)/n[0]), 2)/(n[0]-1)) + (qPow( (qPow(std[1], 2)/n[1]), 2)/(n[1]-1))); - df = qRound(temp); + double temp_val; + temp_val = qPow( qPow(std[0], 2)/n[0] + qPow(std[1], 2)/n[1], 2); + temp_val = temp_val / ( (qPow( (qPow(std[0], 2)/n[0]), 2)/(n[0]-1)) + (qPow( (qPow(std[1], 2)/n[1]), 2)/(n[1]-1))); + df = qRound(temp_val); t = (mean[0] - mean[1]) / (qSqrt( (qPow(std[0], 2)/n[0]) + (qPow(std[1], 2)/n[1]))); resultModel->setData(resultModel->index(8, 0), i18n("Assumption: Non-Equal Variance between both populations"), Qt::DisplayRole); } switch (tail_type) { case HypothesisTest::TailNegative: p_value = nsl_stats_tdist_p(t, df); - temp = i18n("Null Hypothesis : Population mean of %1 %2 Population mean of %3", col1_name, QChar(0x2265), col2_name); - resultModel->setData(resultModel->index(0, 0), temp, Qt::DisplayRole); + temp_msg = i18n("Null Hypothesis : Population mean of %1 %2 Population mean of %3", col1_name, QChar(0x2265), col2_name); + resultModel->setData(resultModel->index(0, 0), temp_msg, Qt::DisplayRole); - temp = i18n("Alternate Hypothesis : Population mean of %1 %2 Population mean of %3", col1_name, QChar(0x3C), col2_name); - resultModel->setData(resultModel->index(1, 0), temp, Qt::DisplayRole); + temp_msg = i18n("Alternate Hypothesis : Population mean of %1 %2 Population mean of %3", col1_name, QChar(0x3C), col2_name); + resultModel->setData(resultModel->index(1, 0), temp_msg, Qt::DisplayRole); break; case HypothesisTest::TailPositive: t *= -1; p_value = nsl_stats_tdist_p(t, df); - temp = i18n("Null Hypothesis : Population mean of %1 %2 Population mean of %3", col1_name, QChar(0x2264), col2_name); - resultModel->setData(resultModel->index(0, 0), temp, Qt::DisplayRole); + temp_msg = i18n("Null Hypothesis : Population mean of %1 %2 Population mean of %3", col1_name, QChar(0x2264), col2_name); + resultModel->setData(resultModel->index(0, 0), temp_msg, Qt::DisplayRole); - temp = i18n("Alternate Hypothesis : Population mean of %1 %2 Population mean of %3", col1_name, QChar(0x3E), col2_name); - resultModel->setData(resultModel->index(1, 0), temp, Qt::DisplayRole); + temp_msg = i18n("Alternate Hypothesis : Population mean of %1 %2 Population mean of %3", col1_name, QChar(0x3E), col2_name); + resultModel->setData(resultModel->index(1, 0), temp_msg, Qt::DisplayRole); break; case HypothesisTest::TailTwo: p_value = nsl_stats_tdist_p(t, df) + nsl_stats_tdist_p(-1*t, df); - temp = i18n("Null Hypothesis : Population mean of %1 %2 Population mean of %3", col1_name, QChar(0x3D), col2_name); - resultModel->setData(resultModel->index(0, 0), temp, Qt::DisplayRole); + temp_msg = i18n("Null Hypothesis : Population mean of %1 %2 Population mean of %3", col1_name, QChar(0x3D), col2_name); + resultModel->setData(resultModel->index(0, 0), temp_msg, Qt::DisplayRole); - temp = i18n("Alternate Hypothesis : Population mean of %1 %2 Population mean of %3", col1_name, QChar(0x2260), col2_name); - resultModel->setData(resultModel->index(1, 0), temp, Qt::DisplayRole); + temp_msg = i18n("Alternate Hypothesis : Population mean of %1 %2 Population mean of %3", col1_name, QChar(0x2260), col2_name); + resultModel->setData(resultModel->index(1, 0), temp_msg, Qt::DisplayRole); break; } resultModel->setData(resultModel->index(3, 0), i18n("T value is %1", t), Qt::DisplayRole); resultModel->setData(resultModel->index(4, 0), i18n("P value is %1", p_value), Qt::DisplayRole); resultModel->setData(resultModel->index(5, 0), i18n("DoF is %1", df), Qt::DisplayRole); resultModel->setData(resultModel->index(6, 0), i18n("Significance level is %1", m_significance_level), Qt::DisplayRole); if (p_value <= m_significance_level) - temp = i18n("We can safely reject Null Hypothesis for significance level %1", m_significance_level); + temp_msg = i18n("We can safely reject Null Hypothesis for significance level %1", m_significance_level); else - temp = i18n("There is a plausibility for Null Hypothesis to be true"); + temp_msg = i18n("There is a plausibility for Null Hypothesis to be true"); - resultModel->setData(resultModel->index(4, 0), temp, Qt::ToolTipRole); + resultModel->setData(resultModel->index(4, 0), temp_msg, Qt::ToolTipRole); break; } case TestZ: { m_currTestName = i18n("Two Sample Independent T Test for %1 vs %2", col1_name, col2_name); double t; int df; - QString temp = ""; + QString temp_msg = ""; double p_value = 0; m_currTestName = i18n("Two Sample Independent Z Test for %1 vs %2", col1_name, col2_name); resultModel->setRowCount(8); resultModel->setColumnCount(1); df = n[0] + n[1] - 2; //Assuming equal variance double sp = qSqrt( ((n[0]-1)*qPow(std[0],2) + (n[1]-1)*qPow(std[1],2))/df); t = (mean[0] - mean[1])/(sp*qSqrt(1.0/n[0] + 1.0/n[1])); switch (tail_type) { case HypothesisTest::TailNegative: p_value = nsl_stats_tdist_p(t, df); - temp = i18n("Null Hypothesis : Population mean of %1 %2 Population mean of %3", col1_name, QChar(0x2265), col2_name); - resultModel->setData(resultModel->index(0, 0), temp, Qt::DisplayRole); + temp_msg = i18n("Null Hypothesis : Population mean of %1 %2 Population mean of %3", col1_name, QChar(0x2265), col2_name); + resultModel->setData(resultModel->index(0, 0), temp_msg, Qt::DisplayRole); - temp = i18n("Alternate Hypothesis : Population mean of %1 %2 Population mean of %3", col1_name, QChar(0x3C), col2_name); - resultModel->setData(resultModel->index(1, 0), temp, Qt::DisplayRole); + temp_msg = i18n("Alternate Hypothesis : Population mean of %1 %2 Population mean of %3", col1_name, QChar(0x3C), col2_name); + resultModel->setData(resultModel->index(1, 0), temp_msg, Qt::DisplayRole); break; case HypothesisTest::TailPositive: t *= -1; p_value = nsl_stats_tdist_p(t, df); - temp = i18n("Null Hypothesis : Population mean of %1 %2 Population mean of %3", col1_name, QChar(0x2264), col2_name); - resultModel->setData(resultModel->index(0, 0), temp, Qt::DisplayRole); + temp_msg = i18n("Null Hypothesis : Population mean of %1 %2 Population mean of %3", col1_name, QChar(0x2264), col2_name); + resultModel->setData(resultModel->index(0, 0), temp_msg, Qt::DisplayRole); - temp = i18n("Alternate Hypothesis : Population mean of %1 %2 Population mean of %3", col1_name, QChar(0x3E), col2_name); - resultModel->setData(resultModel->index(1, 0), temp, Qt::DisplayRole); + temp_msg = i18n("Alternate Hypothesis : Population mean of %1 %2 Population mean of %3", col1_name, QChar(0x3E), col2_name); + resultModel->setData(resultModel->index(1, 0), temp_msg, Qt::DisplayRole); break; case HypothesisTest::TailTwo: p_value = nsl_stats_tdist_p(t, df) + nsl_stats_tdist_p(-1*t, df); - temp = i18n("Null Hypothesis : Population mean of %1 %2 Population mean of %3", col1_name, QChar(0x3D), col2_name); - resultModel->setData(resultModel->index(0, 0), temp, Qt::DisplayRole); + temp_msg = i18n("Null Hypothesis : Population mean of %1 %2 Population mean of %3", col1_name, QChar(0x3D), col2_name); + resultModel->setData(resultModel->index(0, 0), temp_msg, Qt::DisplayRole); - temp = i18n("Alternate Hypothesis : Population mean of %1 %2 Population mean of %3", col1_name, QChar(0x2260), col2_name); - resultModel->setData(resultModel->index(1, 0), temp, Qt::DisplayRole); + temp_msg = i18n("Alternate Hypothesis : Population mean of %1 %2 Population mean of %3", col1_name, QChar(0x2260), col2_name); + resultModel->setData(resultModel->index(1, 0), temp_msg, Qt::DisplayRole); break; } resultModel->setData(resultModel->index(3, 0), i18n("Z value is %1", t), Qt::DisplayRole); resultModel->setData(resultModel->index(4, 0), i18n("P value is %1", p_value), Qt::DisplayRole); resultModel->setData(resultModel->index(5, 0), i18n("Significance level is %1", m_significance_level), Qt::DisplayRole); resultModel->setData(resultModel->index(7, 0), i18n("Assumption: Central Limit Theorem is Valid"), Qt::DisplayRole); if (p_value <= m_significance_level) - temp = i18n("We can safely reject Null Hypothesis for significance level %1", m_significance_level); + temp_msg = i18n("We can safely reject Null Hypothesis for significance level %1", m_significance_level); else - temp = i18n("There is a plausibility for Null Hypothesis to be true"); + temp_msg = i18n("There is a plausibility for Null Hypothesis to be true"); // tool tips - resultModel->setData(resultModel->index(4, 0), temp, Qt::ToolTipRole); + resultModel->setData(resultModel->index(4, 0), temp_msg, Qt::ToolTipRole); // resultModel->setData(resultModel->index(0, 0), QIcon("open.xpm"), Qt::DecorationRole); break; } } emit q->changed(); return; } /********************************Two Sample Paired ***************************************/ void HypothesisTestPrivate::performTwoSamplePairedTest(TestType test) { dataModel->clear(); horizontalHeaderModel->clear(); verticalHeaderModel->clear(); resultModel->clear(); QMessageBox* msg_box = new QMessageBox(); // checking for cols; if (m_columns.size() != 2) { msg_box->setText(i18n("Inappropriate number of columns selected")); msg_box->exec(); return; } bool modeOk = true; for (int i = 0; i < 2; i++) { if(m_columns[i]->columnMode() == AbstractColumn::Numeric || m_columns[i]->columnMode() == AbstractColumn::Integer) continue; modeOk = false; } if (!modeOk) { msg_box->setText(i18n("select only columns with numbers")); msg_box->exec(); return; } int n; double sum, mean, std; findStatsPaired(m_columns[0], m_columns[1], n, sum, mean, std); if (n == -1) { msg_box->setText(i18n("both columns are having different sizes")); msg_box->exec(); return; } if (n < 1) { msg_box->setText(i18n("columns are empty")); msg_box->exec(); return; } QVariant row_major[] = {"", "N", "Sum", "Mean", "Std", "difference", n, sum, mean, std}; m_stats_table = getHtmlTable(2, 5, row_major); if (test == TestT) { m_currTestName = i18n("Two Sample Paired T Test for %1 vs %2", m_columns[0]->name(), m_columns[1]->name()); double t; int df; - QString temp = ""; + QString temp_msg = ""; double p_value = 0; resultModel->setRowCount(7); resultModel->setColumnCount(1); t = mean / (std/qSqrt(n)); df = n - 1; switch (tail_type) { case HypothesisTest::TailNegative: p_value = nsl_stats_tdist_p(t, df); - temp = i18n("Null Hypothesis : Population mean of %1 %2 Population mean of %3", m_columns[0]->name(), QChar(0x2265), m_columns[1]->name()); - resultModel->setData(resultModel->index(0, 0), temp, Qt::DisplayRole); + temp_msg = i18n("Null Hypothesis : Population mean of %1 %2 Population mean of %3", m_columns[0]->name(), QChar(0x2265), m_columns[1]->name()); + resultModel->setData(resultModel->index(0, 0), temp_msg, Qt::DisplayRole); - temp = i18n("Alternate Hypothesis : Population mean of %1 %2 Population mean of %3", m_columns[0]->name(), QChar(0x3C), m_columns[1]->name()); - resultModel->setData(resultModel->index(1, 0), temp, Qt::DisplayRole); + temp_msg = i18n("Alternate Hypothesis : Population mean of %1 %2 Population mean of %3", m_columns[0]->name(), QChar(0x3C), m_columns[1]->name()); + resultModel->setData(resultModel->index(1, 0), temp_msg, Qt::DisplayRole); break; case HypothesisTest::TailPositive: t *= -1; p_value = nsl_stats_tdist_p(t, df); - temp = i18n("Null Hypothesis : Population mean of %1 %2 Population mean of %3", m_columns[0]->name(), QChar(0x2264), m_columns[1]->name()); - resultModel->setData(resultModel->index(0, 0), temp, Qt::DisplayRole); + temp_msg = i18n("Null Hypothesis : Population mean of %1 %2 Population mean of %3", m_columns[0]->name(), QChar(0x2264), m_columns[1]->name()); + resultModel->setData(resultModel->index(0, 0), temp_msg, Qt::DisplayRole); - temp = i18n("Alternate Hypothesis : Population mean of %1 %2 Population mean of %3", m_columns[0]->name(), QChar(0x3E), m_columns[1]->name()); - resultModel->setData(resultModel->index(1, 0), temp, Qt::DisplayRole); + temp_msg = i18n("Alternate Hypothesis : Population mean of %1 %2 Population mean of %3", m_columns[0]->name(), QChar(0x3E), m_columns[1]->name()); + resultModel->setData(resultModel->index(1, 0), temp_msg, Qt::DisplayRole); break; case HypothesisTest::TailTwo: p_value = nsl_stats_tdist_p(t, df) + nsl_stats_tdist_p(-1*t, df); - temp = i18n("Null Hypothesis : Population mean of %1 %2 Population mean of %3", m_columns[0]->name(), QChar(0x3D), m_columns[1]->name()); - resultModel->setData(resultModel->index(0, 0), temp, Qt::DisplayRole); + temp_msg = i18n("Null Hypothesis : Population mean of %1 %2 Population mean of %3", m_columns[0]->name(), QChar(0x3D), m_columns[1]->name()); + resultModel->setData(resultModel->index(0, 0), temp_msg, Qt::DisplayRole); - temp = i18n("Alternate Hypothesis : Population mean of %1 %2 Population mean of %3", m_columns[0]->name(), QChar(0x2260), m_columns[1]->name()); - resultModel->setData(resultModel->index(1, 0), temp, Qt::DisplayRole); + temp_msg = i18n("Alternate Hypothesis : Population mean of %1 %2 Population mean of %3", m_columns[0]->name(), QChar(0x2260), m_columns[1]->name()); + resultModel->setData(resultModel->index(1, 0), temp_msg, Qt::DisplayRole); break; } resultModel->setData(resultModel->index(3, 0), i18n("T value is %1", t), Qt::DisplayRole); resultModel->setData(resultModel->index(4, 0), i18n("P value is %1", p_value), Qt::DisplayRole); resultModel->setData(resultModel->index(5, 0), i18n("DoF is %1", df), Qt::DisplayRole); resultModel->setData(resultModel->index(6, 0), i18n("Significance level is %1", m_significance_level), Qt::DisplayRole); if (p_value <= m_significance_level) - temp = i18n("We can safely reject Null Hypothesis for significance level %1", m_significance_level); + temp_msg = i18n("We can safely reject Null Hypothesis for significance level %1", m_significance_level); else - temp = i18n("There is a plausibility for Null Hypothesis to be true"); + temp_msg = i18n("There is a plausibility for Null Hypothesis to be true"); // tool tips - resultModel->setData(resultModel->index(4, 0), temp, Qt::ToolTipRole); + resultModel->setData(resultModel->index(4, 0), temp_msg, Qt::ToolTipRole); // resultModel->setData(resultModel->index(0, 0), QIcon("open.xpm"), Qt::DecorationRole); emit q->changed(); return; } else if (test == TestZ) { m_currTestName = i18n("Two Sample Paired Z Test for %1 vs %2", m_columns[0]->name(), m_columns[1]->name()); double z; int df; - QString temp = ""; + QString temp_msg = ""; double p_value = 0; resultModel->setRowCount(8); resultModel->setColumnCount(1); z = mean / (std/qSqrt(n)); df = n - 1; switch (tail_type) { case HypothesisTest::TailNegative: p_value = nsl_stats_tdist_p(z, df); - temp = i18n("Null Hypothesis : Population mean of %1 %2 Population mean of %3", m_columns[0]->name(), QChar(0x2265), m_columns[1]->name()); - resultModel->setData(resultModel->index(0, 0), temp, Qt::DisplayRole); + temp_msg = i18n("Null Hypothesis : Population mean of %1 %2 Population mean of %3", m_columns[0]->name(), QChar(0x2265), m_columns[1]->name()); + resultModel->setData(resultModel->index(0, 0), temp_msg, Qt::DisplayRole); - temp = i18n("Alternate Hypothesis : Population mean of %1 %2 Population mean of %3", m_columns[0]->name(), QChar(0x3C), m_columns[1]->name()); - resultModel->setData(resultModel->index(1, 0), temp, Qt::DisplayRole); + temp_msg = i18n("Alternate Hypothesis : Population mean of %1 %2 Population mean of %3", m_columns[0]->name(), QChar(0x3C), m_columns[1]->name()); + resultModel->setData(resultModel->index(1, 0), temp_msg, Qt::DisplayRole); break; case HypothesisTest::TailPositive: z *= -1; p_value = nsl_stats_tdist_p(z, df); - temp = i18n("Null Hypothesis : Population mean of %1 %2 Population mean of %3", m_columns[0]->name(), QChar(0x2264), m_columns[1]->name()); - resultModel->setData(resultModel->index(0, 0), temp, Qt::DisplayRole); + temp_msg = i18n("Null Hypothesis : Population mean of %1 %2 Population mean of %3", m_columns[0]->name(), QChar(0x2264), m_columns[1]->name()); + resultModel->setData(resultModel->index(0, 0), temp_msg, Qt::DisplayRole); - temp = i18n("Alternate Hypothesis : Population mean of %1 %2 Population mean of %3", m_columns[0]->name(), QChar(0x3E), m_columns[1]->name()); - resultModel->setData(resultModel->index(1, 0), temp, Qt::DisplayRole); + temp_msg = i18n("Alternate Hypothesis : Population mean of %1 %2 Population mean of %3", m_columns[0]->name(), QChar(0x3E), m_columns[1]->name()); + resultModel->setData(resultModel->index(1, 0), temp_msg, Qt::DisplayRole); break; case HypothesisTest::TailTwo: p_value = nsl_stats_tdist_p(z, df) + nsl_stats_tdist_p(-1*z, df); - temp = i18n("Null Hypothesis : Population mean of %1 %2 Population mean of %3", m_columns[0]->name(), QChar(0x3D), m_columns[1]->name()); - resultModel->setData(resultModel->index(0, 0), temp, Qt::DisplayRole); + temp_msg = i18n("Null Hypothesis : Population mean of %1 %2 Population mean of %3", m_columns[0]->name(), QChar(0x3D), m_columns[1]->name()); + resultModel->setData(resultModel->index(0, 0), temp_msg, Qt::DisplayRole); - temp = i18n("Alternate Hypothesis : Population mean of %1 %2 Population mean of %3", m_columns[0]->name(), QChar(0x2260), m_columns[1]->name()); - resultModel->setData(resultModel->index(1, 0), temp, Qt::DisplayRole); + temp_msg = i18n("Alternate Hypothesis : Population mean of %1 %2 Population mean of %3", m_columns[0]->name(), QChar(0x2260), m_columns[1]->name()); + resultModel->setData(resultModel->index(1, 0), temp_msg, Qt::DisplayRole); break; } resultModel->setData(resultModel->index(3, 0), i18n("Z value is %1", z), Qt::DisplayRole); resultModel->setData(resultModel->index(4, 0), i18n("P value is %1", p_value), Qt::DisplayRole); resultModel->setData(resultModel->index(5, 0), i18n("Significance level is %1", m_significance_level), Qt::DisplayRole); resultModel->setData(resultModel->index(7, 0), i18n("Assumption: Central Limit Theorem is Valid"), Qt::DisplayRole); if (p_value <= m_significance_level) - temp = i18n("We can safely reject Null Hypothesis for significance level %1", m_significance_level); + temp_msg = i18n("We can safely reject Null Hypothesis for significance level %1", m_significance_level); else - temp = i18n("There is a plausibility for Null Hypothesis to be true"); + temp_msg = i18n("There is a plausibility for Null Hypothesis to be true"); // tool tips - resultModel->setData(resultModel->index(4, 0), temp, Qt::ToolTipRole); + resultModel->setData(resultModel->index(4, 0), temp_msg, Qt::ToolTipRole); // resultModel->setData(resultModel->index(0, 0), QIcon("open.xpm"), Qt::DecorationRole); emit q->changed(); return; } } /******************************** One Sample ***************************************/ void HypothesisTestPrivate::PerformOneSampleTest(TestType test) { dataModel->clear(); horizontalHeaderModel->clear(); verticalHeaderModel->clear(); resultModel->clear(); QMessageBox* msg_box = new QMessageBox(); // checking for cols; if (m_columns.size() != 1) { msg_box->setText(i18n("Inappropriate number of columns selected")); msg_box->exec(); return; } if ( !(m_columns[0]->columnMode() == AbstractColumn::Numeric || m_columns[0]->columnMode() == AbstractColumn::Integer)) { msg_box->setText(i18n("select only columns with numbers")); msg_box->exec(); return; } int n; double sum, mean, std; findStats(m_columns[0], n, sum, mean, std); if (n < 1) { msg_box->setText(i18n("column is empty")); msg_box->exec(); return; } QVariant row_major[] = {"", "N", "Sum", "Mean", "Std", m_columns[0]->name(), n, sum, mean, std}; m_stats_table = getHtmlTable(2, 5, row_major); if (test == TestT) { m_currTestName = i18n("One Sample T Test for %1", m_columns[0]->name()); double t; int df; - QString temp = ""; + QString temp_msg = ""; double p_value = 0; resultModel->setRowCount(7); resultModel->setColumnCount(1); t = (mean - m_population_mean) / (std/qSqrt(n)); df = n - 1; switch (tail_type) { case HypothesisTest::TailNegative: p_value = nsl_stats_tdist_p(t, df); - temp = i18n("Null Hypothesis : Population mean of %1 %2 %3", m_columns[0]->name(), QChar(0x2265), m_population_mean); - resultModel->setData(resultModel->index(0, 0), temp, Qt::DisplayRole); + temp_msg = i18n("Null Hypothesis : Population mean of %1 %2 %3", m_columns[0]->name(), QChar(0x2265), m_population_mean); + resultModel->setData(resultModel->index(0, 0), temp_msg, Qt::DisplayRole); - temp = i18n("Alternate Hypothesis : Population mean of %1 %2 %3", m_columns[0]->name(), QChar(0x3C), m_population_mean); - resultModel->setData(resultModel->index(1, 0), temp, Qt::DisplayRole); + temp_msg = i18n("Alternate Hypothesis : Population mean of %1 %2 %3", m_columns[0]->name(), QChar(0x3C), m_population_mean); + resultModel->setData(resultModel->index(1, 0), temp_msg, Qt::DisplayRole); break; case HypothesisTest::TailPositive: t *= -1; p_value = nsl_stats_tdist_p(t, df); - temp = i18n("Null Hypothesis : Population mean of %1 %2 %3", m_columns[0]->name(), QChar(0x2264), m_population_mean); - resultModel->setData(resultModel->index(0, 0), temp, Qt::DisplayRole); + temp_msg = i18n("Null Hypothesis : Population mean of %1 %2 %3", m_columns[0]->name(), QChar(0x2264), m_population_mean); + resultModel->setData(resultModel->index(0, 0), temp_msg, Qt::DisplayRole); - temp = i18n("Alternate Hypothesis : Population mean of %1 %2 %3", m_columns[0]->name(), QChar(0x3E), m_population_mean); - resultModel->setData(resultModel->index(1, 0), temp, Qt::DisplayRole); + temp_msg = i18n("Alternate Hypothesis : Population mean of %1 %2 %3", m_columns[0]->name(), QChar(0x3E), m_population_mean); + resultModel->setData(resultModel->index(1, 0), temp_msg, Qt::DisplayRole); break; case HypothesisTest::TailTwo: p_value = nsl_stats_tdist_p(t, df) + nsl_stats_tdist_p(-1*t, df); - temp = i18n("Null Hypothesis : Population mean of %1 %2 %3", m_columns[0]->name(), QChar(0x3D), m_population_mean); - resultModel->setData(resultModel->index(0, 0), temp, Qt::DisplayRole); + temp_msg = i18n("Null Hypothesis : Population mean of %1 %2 %3", m_columns[0]->name(), QChar(0x3D), m_population_mean); + resultModel->setData(resultModel->index(0, 0), temp_msg, Qt::DisplayRole); - temp = i18n("Alternate Hypothesis : Population mean of %1 %2 %3", m_columns[0]->name(), QChar(0x2260), m_population_mean); - resultModel->setData(resultModel->index(1, 0), temp, Qt::DisplayRole); + temp_msg = i18n("Alternate Hypothesis : Population mean of %1 %2 %3", m_columns[0]->name(), QChar(0x2260), m_population_mean); + resultModel->setData(resultModel->index(1, 0), temp_msg, Qt::DisplayRole); break; } resultModel->setData(resultModel->index(3, 0), i18n("Z value is %1", t), Qt::DisplayRole); resultModel->setData(resultModel->index(4, 0), i18n("P value is %1", p_value), Qt::DisplayRole); resultModel->setData(resultModel->index(5, 0), i18n("DoF is %1", df), Qt::DisplayRole); resultModel->setData(resultModel->index(6, 0), i18n("Significance level is %1", m_significance_level), Qt::DisplayRole); if (p_value <= m_significance_level) - temp = i18n("We can safely reject Null Hypothesis for significance level %1", m_significance_level); + temp_msg = i18n("We can safely reject Null Hypothesis for significance level %1", m_significance_level); else - temp = i18n("There is a plausibility for Null Hypothesis to be true"); + temp_msg = i18n("There is a plausibility for Null Hypothesis to be true"); // tool tips - resultModel->setData(resultModel->index(4, 0), temp, Qt::ToolTipRole); + resultModel->setData(resultModel->index(4, 0), temp_msg, Qt::ToolTipRole); // resultModel->setData(resultModel->index(0, 0), QIcon("open.xpm"), Qt::DecorationRole); emit q->changed(); return; } else if (test == TestZ) { m_currTestName = i18n("One Sample Z Test for %1", m_columns[0]->name()); double z; int df; - QString temp = ""; + QString temp_msg = ""; double p_value = 0; resultModel->setRowCount(8); resultModel->setColumnCount(1); z = (mean - m_population_mean) / (std/qSqrt(n)); df = n - 1; switch (tail_type) { case HypothesisTest::TailNegative: p_value = nsl_stats_tdist_p(z, df); - temp = i18n("Null Hypothesis : Population mean of %1 %2 %3", m_columns[0]->name(), QChar(0x2265), m_population_mean); - resultModel->setData(resultModel->index(0, 0), temp, Qt::DisplayRole); + temp_msg = i18n("Null Hypothesis : Population mean of %1 %2 %3", m_columns[0]->name(), QChar(0x2265), m_population_mean); + resultModel->setData(resultModel->index(0, 0), temp_msg, Qt::DisplayRole); - temp = i18n("Alternate Hypothesis : Population mean of %1 %2 %3", m_columns[0]->name(), QChar(0x3C), m_population_mean); - resultModel->setData(resultModel->index(1, 0), temp, Qt::DisplayRole); + temp_msg = i18n("Alternate Hypothesis : Population mean of %1 %2 %3", m_columns[0]->name(), QChar(0x3C), m_population_mean); + resultModel->setData(resultModel->index(1, 0), temp_msg, Qt::DisplayRole); break; case HypothesisTest::TailPositive: z *= -1; p_value = nsl_stats_tdist_p(z, df); - temp = i18n("Null Hypothesis : Population mean of %1 %2 %3", m_columns[0]->name(), QChar(0x2264), m_population_mean); - resultModel->setData(resultModel->index(0, 0), temp, Qt::DisplayRole); + temp_msg = i18n("Null Hypothesis : Population mean of %1 %2 %3", m_columns[0]->name(), QChar(0x2264), m_population_mean); + resultModel->setData(resultModel->index(0, 0), temp_msg, Qt::DisplayRole); - temp = i18n("Alternate Hypothesis : Population mean of %1 %2 %3", m_columns[0]->name(), QChar(0x3E), m_population_mean); - resultModel->setData(resultModel->index(1, 0), temp, Qt::DisplayRole); + temp_msg = i18n("Alternate Hypothesis : Population mean of %1 %2 %3", m_columns[0]->name(), QChar(0x3E), m_population_mean); + resultModel->setData(resultModel->index(1, 0), temp_msg, Qt::DisplayRole); break; case HypothesisTest::TailTwo: p_value = nsl_stats_tdist_p(z, df) + nsl_stats_tdist_p(-1*z, df); - temp = i18n("Null Hypothesis : Population mean of %1 %2 %3", m_columns[0]->name(), QChar(0x3D), m_population_mean); - resultModel->setData(resultModel->index(0, 0), temp, Qt::DisplayRole); + temp_msg = i18n("Null Hypothesis : Population mean of %1 %2 %3", m_columns[0]->name(), QChar(0x3D), m_population_mean); + resultModel->setData(resultModel->index(0, 0), temp_msg, Qt::DisplayRole); - temp = i18n("Alternate Hypothesis : Population mean of %1 %2 %3", m_columns[0]->name(), QChar(0x2260), m_population_mean); - resultModel->setData(resultModel->index(1, 0), temp, Qt::DisplayRole); + temp_msg = i18n("Alternate Hypothesis : Population mean of %1 %2 %3", m_columns[0]->name(), QChar(0x2260), m_population_mean); + resultModel->setData(resultModel->index(1, 0), temp_msg, Qt::DisplayRole); break; } resultModel->setData(resultModel->index(3, 0), i18n("Z value is %1", z), Qt::DisplayRole); resultModel->setData(resultModel->index(4, 0), i18n("P value is %1", p_value), Qt::DisplayRole); resultModel->setData(resultModel->index(5, 0), i18n("Significance level is %1", m_significance_level), Qt::DisplayRole); resultModel->setData(resultModel->index(7, 0), i18n("Assumption: Central Limit Theorem is Valid"), Qt::DisplayRole); if (p_value <= m_significance_level) - temp = i18n("We can safely reject Null Hypothesis for significance level %1", m_significance_level); + temp_msg = i18n("We can safely reject Null Hypothesis for significance level %1", m_significance_level); else - temp = i18n("There is a plausibility for Null Hypothesis to be true"); + temp_msg = i18n("There is a plausibility for Null Hypothesis to be true"); // tool tips - resultModel->setData(resultModel->index(4, 0), temp, Qt::ToolTipRole); + resultModel->setData(resultModel->index(4, 0), temp_msg, Qt::ToolTipRole); // resultModel->setData(resultModel->index(0, 0), QIcon("open.xpm"), Qt::DecorationRole); emit q->changed(); return; } } void HypothesisTestPrivate::findStatsPaired(Column* column1, Column* column2, int &count, double &sum, double &mean, double &std) { sum = 0; mean = 0; std = 0; int count1 = column1->rowCount(); int count2 = column2->rowCount(); count = qMin(count1, count2); double row1, row2; for (int i = 0; i < count; i++) { row1 = column1->valueAt(i); row2 = column2->valueAt(i); if (std::isnan(row1) || std::isnan(row2)) { if (std::isnan(row1) && std::isnan(row2)) count = i; else { count = -1; return; } break; } sum += row1 - row2; } if (count < 1) return; mean = sum/count; double row; for (int i = 0; i < count; i++) { row1 = column1->valueAt(i); row2 = column2->valueAt(i); row = row1 - row2; std += qPow( (row - mean), 2); } if (count > 1) std = std / (count-1); std = qSqrt(std); return; } void HypothesisTestPrivate::findStats(Column* column, int &count, double &sum, double &mean, double &std) { sum = 0; mean = 0; std = 0; count = column->rowCount(); for (int i = 0; i < count; i++) { double row = column->valueAt(i); if ( std::isnan(row)) { count = i; break; } sum += row; } if (count < 1) return; mean = sum/count; for (int i = 0; i < count; i++) { double row = column->valueAt(i); std += qPow( (row - mean), 2); } if (count > 1) std = std / (count-1); std = qSqrt(std); return; } void HypothesisTestPrivate::findStatsCategorical(int n[], double sum[], double mean[], double std[], QString &col1_name, QString &col2_name) { /* Error codes; n[0] = -1 : unequal rows n[0] = -2 : #categorical variables != 2; */ // clearing the variables; for (int i = 0; i < 2; i++) { sum[i] = 0; mean[i] = 0; std[i] = 0; n[i] = 0; } int count_temp = m_columns[0]->rowCount(); col1_name = ""; col2_name = ""; for (int i = 0; i < count_temp; i++) { - QString row1 = m_columns[0]->textAt(i); - double row2 = m_columns[1]->valueAt(i); + QString name = m_columns[0]->textAt(i); + double value = m_columns[1]->valueAt(i); - if (row1 == "" || std::isnan(row2)) { - if (row1 == "" && std::isnan(row2)) { + if (name == "" || std::isnan(value)) { + if (name == "" && std::isnan(value)) { break; } else { n[0] = -1; return; } } - if (row1 == col1_name) { + if (name == col1_name) { n[0]++; - sum[0] += row2; - } else if (row1 == col2_name) { + sum[0] += value; + } else if (name == col2_name) { n[1]++; - sum[1] += row2; + sum[1] += value; } else if (col1_name == "") { n[0]++; - sum[0] += row2; - col1_name = row1; + sum[0] += value; + col1_name = name; } else if (col2_name == "") { n[1]++; - sum[1] += row2; - col2_name = row1; + sum[1] += value; + col2_name = name; } else { // this case occurs when there are more than two categorical variables in column 1 // sending error code of -1; n[0] = -2; return; } } if (col1_name == "" || col2_name == "") { n[0] = -2; return; } mean[0] = sum[0]/n[0]; mean[1] = sum[1]/n[1]; for (int i = 0; i < n[0]+n[1]; i++) { - QString row1 = m_columns[0]->textAt(i); - double row2 = m_columns[1]->valueAt(i); + QString name = m_columns[0]->textAt(i); + double value = m_columns[1]->valueAt(i); - if (row1 == col1_name) { - std[0] += qPow( (row2 - mean[0]), 2); + if (name == col1_name) { + std[0] += qPow( (value - mean[0]), 2); } else { - std[1] += qPow( (row2 - mean[1]), 2); + std[1] += qPow( (value - mean[1]), 2); } } for (int i = 0; i < 2; i++) { if (n[i] > 1) std[i] = std[i] / (n[i] - 1); std[i] = qSqrt(std[i]); } return; } QString HypothesisTestPrivate::getHtmlTable(int row, int column, QVariant *row_major) { if (row < 1 || column < 1) return QString(); QString table = ""; table = "" "" " "; QString bg = "tg-0pky"; bool pky = true; QString element; table += " "; for (int j = 0; j < column; j++) { element = row_major[j].toString(); table += i18n(" ", bg, element); } table += " "; if (pky) bg = "tg-0pky"; else bg = "tg-btxf"; pky = !pky; for (int i = 1; i < row; i++) { table += " "; QString element = row_major[i*column].toString(); table += i18n(" ", bg, element); for (int j = 1; j < column; j++) { QString element = row_major[i*column+j].toString(); table += i18n(" ", bg, element); } table += " "; if (pky) bg = "tg-0pky"; else bg = "tg-btxf"; pky = !pky; } table += "
%2
%2%2
"; return table; } /********************************************************************************** * virtual functions implementations * ********************************************************************************/ /*! Saves as XML. */ void HypothesisTest::save(QXmlStreamWriter* writer) const { writer->writeStartElement("hypothesisTest"); writeBasicAttributes(writer); writeCommentElement(writer); //TODO: writer->writeEndElement(); } /*! Loads from XML. */ bool HypothesisTest::load(XmlStreamReader* reader, bool preview) { Q_UNUSED(preview); if (!readBasicAttributes(reader)) return false; //TODO: return !reader->hasError(); } Spreadsheet *HypothesisTest::dataSourceSpreadsheet() const { return d->dataSourceSpreadsheet; } bool HypothesisTest::exportView() const { return true; } bool HypothesisTest::printView() { return true; } bool HypothesisTest::printPreview() const { return true; } /*! Constructs a primary view on me. This method may be called multiple times during the life time of an Aspect, or it might not get called at all. Aspects must not depend on the existence of a view for their operation. */ QWidget* HypothesisTest::view() const { if (!m_partView) { m_view = new HypothesisTestView(const_cast(this)); m_partView = m_view; } return m_partView; } /*! Returns a new context menu. The caller takes ownership of the menu. */ QMenu* HypothesisTest::createContextMenu() { QMenu* menu = AbstractPart::createContextMenu(); // Q_ASSERT(menu); // emit requestProjectContextMenu(menu); return menu; }