diff --git a/src/backend/hypothesis_test/HypothesisTest.cpp b/src/backend/hypothesis_test/HypothesisTest.cpp index cdf84acc3..c101b9e56 100644 --- a/src/backend/hypothesis_test/HypothesisTest.cpp +++ b/src/backend/hypothesis_test/HypothesisTest.cpp @@ -1,926 +1,940 @@ /*************************************************************************** File : HypothesisTest.cpp Project : LabPlot Description : Doing Hypothesis-Test on data provided -------------------------------------------------------------------- Copyright : (C) 2019 Devanshu Agarwal(agarwaldevanshu8@gmail.com) ***************************************************************************/ /*************************************************************************** * * * This program is free software; you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * * the Free Software Foundation; either version 2 of the License, or * * (at your option) any later version. * * * * This program is distributed in the hope that it will be useful, * * but WITHOUT ANY WARRANTY; without even the implied warranty of * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * * GNU General Public License for more details. * * * * You should have received a copy of the GNU General Public License * * along with this program; if not, write to the Free Software * * Foundation, Inc., 51 Franklin Street, Fifth Floor, * * Boston, MA 02110-1301 USA * * * ***************************************************************************/ #include "HypothesisTest.h" #include "HypothesisTestPrivate.h" #include "kdefrontend/hypothesis_test/HypothesisTestView.h" #include "backend/spreadsheet/Spreadsheet.h" #include "backend/core/column/Column.h" #include "backend/lib/macros.h" #include "QDebug" extern "C" { #include "backend/nsl/nsl_stats.h" } #include #include #include #include #include #include #include #include HypothesisTest::HypothesisTest(const QString &name) : AbstractPart(name), d(new HypothesisTestPrivate(this)) { } HypothesisTest::~HypothesisTest() { delete d; } void HypothesisTest::setDataSourceType(DataSourceType type) { if (type != d->dataSourceType) { d->dataSourceType = type; } } HypothesisTest::DataSourceType HypothesisTest::dataSourceType() const { return d->dataSourceType; } void HypothesisTest::setDataSourceSpreadsheet(Spreadsheet *spreadsheet) { if (spreadsheet != d->dataSourceSpreadsheet) d->setDataSourceSpreadsheet(spreadsheet); } void HypothesisTest::setColumns(QVector cols) { d->m_columns = cols; } void HypothesisTest::setColumns(QStringList cols) { return d->setColumns(cols); } QStringList HypothesisTest::allColumns() { return d->all_columns; } void HypothesisTest::setTailType(HypothesisTest::TailType tailType) { d->tail_type = tailType; } HypothesisTest::TailType HypothesisTest::tailType() { return d->tail_type; } void HypothesisTest::setPopulationMean(QVariant populationMean) { d->m_population_mean = populationMean.toDouble(); } void HypothesisTest::setSignificanceLevel(QVariant alpha) { d->m_significance_level = alpha.toDouble(); } QString HypothesisTest::testName() { return d->m_currTestName; } QString HypothesisTest::statsTable() { return d->m_stats_table; } void HypothesisTest::performTwoSampleIndependentTTest(bool categorical_variable, bool equal_variance) { d->m_currTestName = "

Two Sample Independent T Test

"; d->performTwoSampleIndependentTest(HypothesisTestPrivate::TestT, categorical_variable, equal_variance); } void HypothesisTest::performTwoSamplePairedTTest() { d->m_currTestName = "

Two Sample Paried T Test

"; d->performTwoSamplePairedTest(HypothesisTestPrivate::TestT); } void HypothesisTest::performOneSampleTTest() { d->m_currTestName = "

One Sample T Test

"; d->performOneSampleTest(HypothesisTestPrivate::TestT); } void HypothesisTest::performTwoSampleIndependentZTest() { d->m_currTestName = "

Two Sample Independent Z Test

"; d->performTwoSampleIndependentTest(HypothesisTestPrivate::TestZ); } void HypothesisTest::performTwoSamplePairedZTest() { d->m_currTestName = "

Two Sample Paired Z Test

"; d->performTwoSamplePairedTest(HypothesisTestPrivate::TestZ); } void HypothesisTest::performOneSampleZTest() { d->m_currTestName = "

One Sample Z Test

"; d->performOneSampleTest(HypothesisTestPrivate::TestZ); } void HypothesisTest::performLeveneTest(bool categorical_variable) { d->m_currTestName = "

Levene Test for Equality of Variance

"; d->performLeveneTest(categorical_variable); } /****************************************************************************** * Private Implementations * ****************************************************************************/ HypothesisTestPrivate::HypothesisTestPrivate(HypothesisTest* owner) : q(owner) { } HypothesisTestPrivate::~HypothesisTestPrivate() { } void HypothesisTestPrivate::setDataSourceSpreadsheet(Spreadsheet *spreadsheet) { dataSourceSpreadsheet = spreadsheet; //setting rows and columns count; m_rowCount = dataSourceSpreadsheet->rowCount(); m_columnCount = dataSourceSpreadsheet->columnCount(); for (auto* col : dataSourceSpreadsheet->children()) { all_columns << col->name(); } } void HypothesisTestPrivate::setColumns(QStringList cols) { m_columns.clear(); Column* column = new Column("column"); for (QString col : cols) { if (!cols.isEmpty()) { column = dataSourceSpreadsheet->column(col); m_columns.append(column); } } } /**************************Two Sample Independent *************************************/ void HypothesisTestPrivate::performTwoSampleIndependentTest(TestType test,bool categorical_variable, bool equal_variance) { QString test_name; double value; int df = 0; double p_value = 0; clearGlobalVariables(); if (m_columns.size() != 2) { printError("Inappropriate number of columns selected"); emit q->changed(); return; } int n[2]; double sum[2], mean[2], std[2]; QString col1_name = m_columns[0]->name(); QString col2_name = m_columns[1]->name(); if (!categorical_variable && (m_columns[0]->columnMode() == AbstractColumn::Integer || m_columns[0]->columnMode() == AbstractColumn::Numeric)) { for (int i = 0; i < 2; i++) { findStats(m_columns[i], n[i], sum[i], mean[i], std[i]); if (n[i] < 1) { printError("At least one of selected column is empty"); emit q->changed(); return; } } } else { QMap col_name; int np; int total_rows; countPartitions(m_columns[0], np, total_rows); if (np != 2) { printError( i18n("Number of Categorical Variable in Column %1 is not equal to 2", m_columns[0]->name())); emit q->changed(); return; } ErrorType error_code = findStatsCategorical(m_columns[0], m_columns[1], n, sum, mean, std, col_name, np, total_rows); switch (error_code) { case ErrorUnqualSize: { printError( i18n("Unequal size between Column %1 and Column %2", m_columns[0]->name(), m_columns[1]->name())); emit q->changed(); return; }case ErrorEmptyColumn: { printError("At least one of selected column is empty"); emit q->changed(); return; } case NoError: break; } QMapIterator i(col_name); while (i.hasNext()) { i.next(); if (i.value() == 1) col1_name = i.key(); else col2_name = i.key(); } } QVariant row_major[] = {"", "N", "Sum", "Mean", "Std", col1_name, n[0], sum[0], mean[0], std[0], col2_name, n[1], sum[1], mean[1], std[1]}; m_stats_table = getHtmlTable(3, 5, row_major); switch (test) { case TestT: { test_name = "T"; if (equal_variance) { df = n[0] + n[1] - 2; double sp = qSqrt( ((n[0]-1)*qPow(std[0],2) + (n[1]-1)*qPow(std[1],2))/df); value = (mean[0] - mean[1])/(sp*qSqrt(1.0/n[0] + 1.0/n[1])); printLine(9, "Assumption: Equal Variance b/w both population means"); } else { double temp_val; temp_val = qPow( qPow(std[0], 2)/n[0] + qPow(std[1], 2)/n[1], 2); temp_val = temp_val / ( (qPow( (qPow(std[0], 2)/n[0]), 2)/(n[0]-1)) + (qPow( (qPow(std[1], 2)/n[1]), 2)/(n[1]-1))); df = qRound(temp_val); value = (mean[0] - mean[1]) / (qSqrt( (qPow(std[0], 2)/n[0]) + (qPow(std[1], 2)/n[1]))); printLine(9, "Assumption: UnEqual Variance b/w both population means"); } break; } case TestZ: { test_name = "Z"; df = n[0] + n[1] - 2; double sp = qSqrt( ((n[0]-1)*qPow(std[0],2) + (n[1]-1)*qPow(std[1],2))/df); value = (mean[0] - mean[1])/(sp*qSqrt(1.0/n[0] + 1.0/n[1])); } } m_currTestName = i18n("

Two Sample Independent %1 Test for %2 vs %3

", test_name, col1_name, col2_name); p_value = getPValue(test, value, col1_name, col2_name, df); printLine(2, i18n("Significance level is %1", m_significance_level), "blue"); printLine(4, i18n("%1 Value is %2 ", test_name, value), "green"); printLine(5, i18n("P Value is %1 ", p_value), "green"); printLine(6, i18n("Degree of Freedom is %1", df), "green"); if (p_value <= m_significance_level) q->m_view->setResultLine(5, i18n("We can safely reject Null Hypothesis for significance level %1", m_significance_level), Qt::ToolTipRole); else q->m_view->setResultLine(5, i18n("There is a plausibility for Null Hypothesis to be true"), Qt::ToolTipRole); emit q->changed(); return; } /********************************Two Sample Paired ***************************************/ void HypothesisTestPrivate::performTwoSamplePairedTest(TestType test) { QString test_name; int n; double sum, mean, std; double value; int df = 0; double p_value = 0; clearGlobalVariables(); if (m_columns.size() != 2) { printError("Inappropriate number of columns selected"); emit q->changed(); return; } for (int i = 0; i < 2; i++) { if (!(m_columns[i]->columnMode() == AbstractColumn::Numeric || m_columns[i]->columnMode() == AbstractColumn::Integer)) { printError("select only columns with numbers"); emit q->changed(); return; } } ErrorType error_code = findStatsPaired(m_columns[0], m_columns[1], n, sum, mean, std); switch (error_code) { case ErrorUnqualSize: { printError("both columns are having different sizes"); emit q->changed(); return; } case ErrorEmptyColumn: { printError("columns are empty"); emit q->changed(); return; } case NoError: break; default: emit q->changed(); return; } if (n == -1) { printError("both columns are having different sizes"); emit q->changed(); return; } if (n < 1) { printError("columns are empty"); emit q->changed(); return; } QVariant row_major[] = {"", "N", "Sum", "Mean", "Std", "difference", n, sum, mean, std}; m_stats_table = getHtmlTable(2, 5, row_major); switch (test) { case TestT: { value = mean / (std/qSqrt(n)); df = n - 1; test_name = "T"; printLine(6, i18n("Degree of Freedom is %1name(), i18n("%1",m_population_mean), df); m_currTestName = i18n("

One Sample %1 Test for %2 vs %3

", test_name, m_columns[0]->name(), m_columns[1]->name()); printLine(2, i18n("Significance level is %1 ", m_significance_level), "blue"); printLine(4, i18n("%1 Value is %2 ", test_name, value), "green"); printLine(5, i18n("P Value is %1 ", p_value), "green"); if (p_value <= m_significance_level) q->m_view->setResultLine(5, i18n("We can safely reject Null Hypothesis for significance level %1", m_significance_level), Qt::ToolTipRole); else q->m_view->setResultLine(5, i18n("There is a plausibility for Null Hypothesis to be true"), Qt::ToolTipRole); emit q->changed(); return; } /******************************** One Sample ***************************************/ void HypothesisTestPrivate::performOneSampleTest(TestType test) { QString test_name; double value; int df = 0; double p_value = 0; clearGlobalVariables(); if (m_columns.size() != 1) { printError("Inappropriate number of columns selected"); emit q->changed(); return; } if ( !(m_columns[0]->columnMode() == AbstractColumn::Numeric || m_columns[0]->columnMode() == AbstractColumn::Integer)) { printError("select only columns with numbers"); emit q->changed(); return; } int n; double sum, mean, std; ErrorType error_code = findStats(m_columns[0], n, sum, mean, std); switch (error_code) { case ErrorUnqualSize: { printError("column is empty"); emit q->changed(); return; } case NoError: break; default: { emit q->changed(); return; } } QVariant row_major[] = {"", "N", "Sum", "Mean", "Std", m_columns[0]->name(), n, sum, mean, std}; m_stats_table = getHtmlTable(2, 5, row_major); switch (test) { case TestT: { test_name = "T"; value = (mean - m_population_mean) / (std/qSqrt(n)); df = n - 1; printLine(6, i18n("Degree of Freedom is %1", df), "blue"); break; } case TestZ: { test_name = "Z"; df = 0; value = (mean - m_population_mean) / (std/qSqrt(n)); }} p_value = getPValue(test, value, m_columns[0]->name(), i18n("%1",m_population_mean), df); m_currTestName = i18n("

One Sample %1 Test for %2

", test_name, m_columns[0]->name()); printLine(2, i18n("Significance level is %1", m_significance_level), "blue"); printLine(4, i18n("%1 Value is %2", test_name, value), "green"); printLine(5, i18n("P Value is %1", p_value), "green"); if (p_value <= m_significance_level) q->m_view->setResultLine(5, i18n("We can safely reject Null Hypothesis for significance level %1", m_significance_level), Qt::ToolTipRole); else q->m_view->setResultLine(5, i18n("There is a plausibility for Null Hypothesis to be true"), Qt::ToolTipRole); emit q->changed(); return; } /**************************************Levene Test****************************************/ void HypothesisTestPrivate::performLeveneTest(bool categorical_variable) { QString test_name; double f_value; int df = 0; // degree of freedom double p_value = 0; int np = 0; // number of partitions int total_rows = 0; int total_count = 0; clearGlobalVariables(); if (m_columns.size() != 2) { printError("Inappropriate number of columns selected"); emit q->changed(); return; } if (!categorical_variable && (m_columns[0]->columnMode() == AbstractColumn::Integer || m_columns[0]->columnMode() == AbstractColumn::Numeric)) np = m_columns.size(); else countPartitions(m_columns[0], np, total_rows); - int *n = new int[np]; + int *n = new int[np]; double* sum = new double[np]; double* mean = new double[np]; double* std = new double[np]; QString* col_names = new QString[np]; if (!categorical_variable && (m_columns[0]->columnMode() == AbstractColumn::Integer || m_columns[0]->columnMode() == AbstractColumn::Numeric)) { for (int i = 0; i < np; i++) { findStats(m_columns[i], n[i], sum[i], mean[i], std[i]); total_count += n[i]; if (n[i] < 1) { printError("At least one of selected column is empty"); emit q->changed(); return; } col_names[i] = m_columns[i]->name(); } } -// else { -// QMap col_name_to_partition; -// ErrorType error_code = findStatsCategorical(m_columns[0], m_columns[1], n, sum, mean, std, col_name_to_partition, np, total_rows); -// switch (error_code) { -// case ErrorUnqualSize: { -// printError( i18n("Unequal size between Column %1 and Column %2", m_columns[0]->name(), m_columns[1]->name())); -// emit q->changed(); -// return; -// }case ErrorEmptyColumn: { -// printError("At least one of selected column is empty"); -// emit q->changed(); -// return; -// } case NoError: -// break; -// } - -// QMapIterator i(col_name_to_partition); -// while (i.hasNext()) { -// i.next(); -// col_names[i.value()] = i.key(); -// } -// } + else { + QMap col_name_to_partition; + ErrorType error_code = findStatsCategorical(m_columns[0], m_columns[1], n, sum, mean, std, col_name_to_partition, np, total_rows); + switch (error_code) { + case ErrorUnqualSize: { + printError( i18n("Unequal size between Column %1 and Column %2", m_columns[0]->name(), m_columns[1]->name())); + emit q->changed(); + return; + }case ErrorEmptyColumn: { + printError("At least one of selected column is empty"); + emit q->changed(); + return; + } case NoError: + break; + } + + QMapIterator i(col_name_to_partition); + while (i.hasNext()) { + i.next(); + col_names[i.value()-1] = i.key(); + } + } int row_count = np+1; int column_count = 5; + qDebug() << " row count is " << row_count; + QVariant* row_major = new QVariant[row_count*column_count]; // header data; row_major[0] = ""; row_major[1] = "N"; row_major[2] = "Sum"; row_major[3] = "Mean"; row_major[4] = "Std"; // table data for (int row_i = 1; row_i < row_count; row_i++) { row_major[row_i*column_count] = col_names[row_i-1]; row_major[row_i*column_count + 1] = n[row_i-1]; row_major[row_i*column_count + 2] = sum[row_i-1]; row_major[row_i*column_count + 3] = mean[row_i-1]; row_major[row_i*column_count + 4] = std[row_i-1]; } m_stats_table = getHtmlTable(row_count, column_count, row_major); //// switch (test) { //// case TestT: { //// test_name = "T"; //// if (equal_variance) { //// df = n[0] + n[1] - 2; //// double sp = qSqrt( ((n[0]-1)*qPow(std[0],2) + (n[1]-1)*qPow(std[1],2))/df); //// value = (mean[0] - mean[1])/(sp*qSqrt(1.0/n[0] + 1.0/n[1])); //// printLine(9, "Assumption: Equal Variance b/w both population means"); //// } else { //// double temp_val; //// temp_val = qPow( qPow(std[0], 2)/n[0] + qPow(std[1], 2)/n[1], 2); //// temp_val = temp_val / ( (qPow( (qPow(std[0], 2)/n[0]), 2)/(n[0]-1)) + (qPow( (qPow(std[1], 2)/n[1]), 2)/(n[1]-1))); //// df = qRound(temp_val); //// value = (mean[0] - mean[1]) / (qSqrt( (qPow(std[0], 2)/n[0]) + (qPow(std[1], 2)/n[1]))); //// printLine(9, "Assumption: UnEqual Variance b/w both population means"); //// } //// break; //// } case TestZ: { //// test_name = "Z"; //// df = n[0] + n[1] - 2; //// double sp = qSqrt( ((n[0]-1)*qPow(std[0],2) + (n[1]-1)*qPow(std[1],2))/df); //// value = (mean[0] - mean[1])/(sp*qSqrt(1.0/n[0] + 1.0/n[1])); //// } //// } //// m_currTestName = i18n("

Two Sample Independent %1 Test for %2 vs %3

", test_name, col1_name, col2_name); //// p_value = getPValue(test, value, col1_name, col2_name, df); //// printLine(2, i18n("Significance level is %1", m_significance_level), "blue"); //// printLine(4, i18n("%1 Value is %2 ", test_name, value), "green"); //// printLine(5, i18n("P Value is %1 ", p_value), "green"); //// printLine(6, i18n("Degree of Freedom is %1", df), "green"); //// if (p_value <= m_significance_level) //// q->m_view->setResultLine(5, i18n("We can safely reject Null Hypothesis for significance level %1", m_significance_level), Qt::ToolTipRole); //// else //// q->m_view->setResultLine(5, i18n("There is a plausibility for Null Hypothesis to be true"), Qt::ToolTipRole); emit q->changed(); return; } /***************************************Helper Functions*************************************/ HypothesisTestPrivate::ErrorType HypothesisTestPrivate::findStats(const Column* column, int &count, double &sum, double &mean, double &std) { sum = 0; mean = 0; std = 0; count = column->rowCount(); for (int i = 0; i < count; i++) { double row = column->valueAt(i); if ( std::isnan(row)) { count = i; break; } sum += row; } if (count < 1) return HypothesisTestPrivate::ErrorEmptyColumn; mean = sum/count; for (int i = 0; i < count; i++) { double row = column->valueAt(i); std += qPow( (row - mean), 2); } if (count > 1) std = std / (count-1); std = qSqrt(std); return HypothesisTestPrivate::NoError; } HypothesisTestPrivate::ErrorType HypothesisTestPrivate::findStatsPaired(const Column* column1, const Column* column2, int &count, double &sum, double &mean, double &std) { sum = 0; mean = 0; std = 0; int count1 = column1->rowCount(); int count2 = column2->rowCount(); count = qMin(count1, count2); double cell1, cell2; for (int i = 0; i < count; i++) { cell1 = column1->valueAt(i); cell2 = column2->valueAt(i); if (std::isnan(cell1) || std::isnan(cell2)) { if (std::isnan(cell1) && std::isnan(cell2)) count = i; else return HypothesisTestPrivate::ErrorUnqualSize; break; } sum += cell1 - cell2; } if (count < 1) return HypothesisTestPrivate::ErrorEmptyColumn; mean = sum/count; double row; for (int i = 0; i < count; i++) { cell1 = column1->valueAt(i); cell2 = column2->valueAt(i); row = cell1 - cell2; std += qPow( (row - mean), 2); } if (count > 1) std = std / (count-1); std = qSqrt(std); return HypothesisTestPrivate::NoError; } -void HypothesisTestPrivate::countPartitions(const Column *column, int &np, int &total_rows) { +void HypothesisTestPrivate::countPartitions(Column *column, int &np, int &total_rows) { total_rows = column->rowCount(); np = 0; QString cell_value; QMap discovered_categorical_var; + + AbstractColumn::ColumnMode original_col_mode = column->columnMode(); + column->setColumnMode(AbstractColumn::Text); + for (int i = 0; i < total_rows; i++) { - cell_value = m_columns[0]->textAt(i); + cell_value = column->textAt(i); + if (cell_value.isEmpty()) { total_rows = i; break; } if (discovered_categorical_var[cell_value]) continue; discovered_categorical_var[cell_value] = true; np++; } + column->setColumnMode(original_col_mode); } -HypothesisTestPrivate::ErrorType HypothesisTestPrivate::findStatsCategorical(const Column *column1, const Column *column2, int n[], double sum[], double mean[], double std[], QMap &col_name, const int &np, const int &total_rows) { - const Column* columns[] = {column1, column2}; +HypothesisTestPrivate::ErrorType HypothesisTestPrivate::findStatsCategorical(Column *column1, Column *column2, int n[], double sum[], double mean[], double std[], QMap &col_name, const int &np, const int &total_rows) { + Column* columns[] = {column1, column2}; for (int i = 0; i < np; i++) { n[i] = 0; sum[i] = 0; mean[i] = 0; std[i] = 0; } + AbstractColumn::ColumnMode original_col_mode = columns[0]->columnMode(); + columns[0]->setColumnMode(AbstractColumn::Text); int partition_number = 1; for (int i = 0; i < total_rows; i++) { QString name = columns[0]->textAt(i); + + name = columns[0]->textAt(i); double value = columns[1]->valueAt(i); if (std::isnan(value)) { + columns[0]->setColumnMode(original_col_mode); return HypothesisTestPrivate::ErrorUnqualSize; } if (col_name[name] == 0) { col_name[name] = partition_number; partition_number++; } n[col_name[name]-1]++; sum[col_name[name]-1] += value; } for (int i = 0; i < np; i++) mean[i] = sum[i] / n[i]; for (int i = 0; i < total_rows; i++) { QString name = columns[0]->textAt(i); double value = columns[1]->valueAt(i); std[col_name[name]-1] += qPow( (value - mean[col_name[name]-1]), 2); } for (int i = 0; i < np; i++) { if (n[i] > 1) std[i] = std[i] / (n[i] - 1); std[i] = qSqrt(std[i]); } + columns[0]->setColumnMode(original_col_mode); return HypothesisTestPrivate::NoError; } double HypothesisTestPrivate::getPValue(const HypothesisTestPrivate::TestType &test, double &value, const QString &col1_name, const QString &col2_name, const int df) { double p_value = 0; //TODO change ("⋖") symbol to ("<"), currently macro UTF8_QSTRING is not working properly if used "<" symbol; switch (test) { case TestT: { switch (tail_type) { case HypothesisTest::TailNegative: p_value = nsl_stats_tdist_p(value, df); printLine(0, i18n("Null Hypothesis: Population mean of %1 %2 Population mean of %3", col1_name, UTF8_QSTRING("≥"), col2_name), "blue"); printLine(1, i18n("Alternate Hypothesis: Population mean of %1 %2 Population mean of %3", col1_name, UTF8_QSTRING("⋖"), col2_name), "blue"); break; case HypothesisTest::TailPositive: value *= -1; p_value = nsl_stats_tdist_p(value, df); printLine(0, i18n("Null Hypothesis: Population mean of %1 %2 Population mean of %3", col1_name, UTF8_QSTRING("≤"), col2_name), "blue"); printLine(1, i18n("Alternate Hypothesis: Population mean of %1 %2 Population mean of %3", col1_name, UTF8_QSTRING(">"), col2_name), "blue"); break; case HypothesisTest::TailTwo: p_value = nsl_stats_tdist_p(value, df) + nsl_stats_tdist_p(-1*value, df); printLine(0, i18n("Null Hypothesis: Population mean of %1 %2 Population mean of %3", col1_name, UTF8_QSTRING("="), col2_name), "blue"); printLine(1, i18n("Alternate Hypothesis: Population mean of %1 %2 Population mean of %3", col1_name, UTF8_QSTRING("≠"), col2_name), "blue"); break; } break; } case TestZ: { switch (tail_type) { case HypothesisTest::TailNegative: p_value = nsl_stats_tdist_p(value, df); printLine(0, i18n("Null Hypothesis: Population mean of %1 %2 Population mean of %3 ", col1_name, UTF8_QSTRING("≥"), col2_name), "blue"); printLine(1, i18n("Alternate Hypothesis: Population mean of %1 %2 Population mean of %3 ", col1_name, UTF8_QSTRING("⋖"), col2_name), "blue"); break; case HypothesisTest::TailPositive: value *= -1; p_value = nsl_stats_tdist_p(value, df); printLine(0, i18n("Null Hypothesis: Population mean of %1 %2 Population mean of %3 ", col1_name, UTF8_QSTRING("≤"), col2_name), "blue"); printLine(1, i18n("Alternate Hypothesis: Population mean of %1 %2 Population mean of %3 ", col1_name, UTF8_QSTRING(">"), col2_name), "blue"); break; case HypothesisTest::TailTwo: p_value = nsl_stats_tdist_p(value, df) + nsl_stats_tdist_p(-1*value, df); printLine(0, i18n("Null Hypothesis: Population mean of %1 %2 Population mean of %3 ", col1_name, UTF8_QSTRING("="), col2_name), "blue"); printLine(1, i18n("Alternate Hypothesis: Population mean of %1 %2 Population mean of %3 ", col1_name, UTF8_QSTRING("≠"), col2_name), "blue"); break; } break; } } if (p_value > 1) return 1; return p_value; } QString HypothesisTestPrivate::getHtmlTable(int row, int column, QVariant *row_major) { if (row < 1 || column < 1) return QString(); QString table = ""; table = "" "" " "; QString bg = "tg-0pky"; bool pky = true; QString element; table += " "; for (int j = 0; j < column; j++) { element = row_major[j].toString(); table += i18n(" ", bg, element); } table += " "; if (pky) bg = "tg-0pky"; else bg = "tg-btxf"; pky = !pky; for (int i = 1; i < row; i++) { table += " "; QString element = row_major[i*column].toString(); table += i18n(" ", bg, element); for (int j = 1; j < column; j++) { QString element = row_major[i*column+j].toString(); table += i18n(" ", bg, element); } table += " "; if (pky) bg = "tg-0pky"; else bg = "tg-btxf"; pky = !pky; } table += "
%2
%2%2
"; return table; } void HypothesisTestPrivate::printLine(const int &index, const QString &msg, const QString &color) { q->m_view->setResultLine(index, i18n("

%2

", color, msg)); return; } void HypothesisTestPrivate::printError(const QString &error_msg) { printLine(0, error_msg, "red"); emit q->changed(); } void HypothesisTestPrivate::clearGlobalVariables() { m_stats_table = ""; q->m_view->clearResult(); } /********************************************************************************** * virtual functions implementations * ********************************************************************************/ /*! Saves as XML. */ void HypothesisTest::save(QXmlStreamWriter* writer) const { writer->writeStartElement("hypothesisTest"); writeBasicAttributes(writer); writeCommentElement(writer); //TODO: writer->writeEndElement(); } /*! Loads from XML. */ bool HypothesisTest::load(XmlStreamReader* reader, bool preview) { Q_UNUSED(preview); if (!readBasicAttributes(reader)) return false; //TODO: return !reader->hasError(); } Spreadsheet *HypothesisTest::dataSourceSpreadsheet() const { return d->dataSourceSpreadsheet; } bool HypothesisTest::exportView() const { return true; } bool HypothesisTest::printView() { return true; } bool HypothesisTest::printPreview() const { return true; } /*! Constructs a primary view on me. This method may be called multiple times during the life time of an Aspect, or it might not get called at all. Aspects must not depend on the existence of a view for their operation. */ QWidget* HypothesisTest::view() const { if (!m_partView) { m_view = new HypothesisTestView(const_cast(this)); m_partView = m_view; } return m_partView; } /*! Returns a new context menu. The caller takes ownership of the menu. */ QMenu* HypothesisTest::createContextMenu() { QMenu* menu = AbstractPart::createContextMenu(); // Q_ASSERT(menu); // emit requestProjectContextMenu(menu); return menu; } diff --git a/src/backend/hypothesis_test/HypothesisTestPrivate.h b/src/backend/hypothesis_test/HypothesisTestPrivate.h index 4ebd097e1..6f16decc0 100644 --- a/src/backend/hypothesis_test/HypothesisTestPrivate.h +++ b/src/backend/hypothesis_test/HypothesisTestPrivate.h @@ -1,84 +1,84 @@ /*************************************************************************** File : HypothesisTestPrivate.h Project : LabPlot Description : Private members of Hypothesis Test -------------------------------------------------------------------- Copyright : (C) 2019 Devanshu Agarwal(agarwaldevanshu8@gmail.com) ***************************************************************************/ /*************************************************************************** * * * This program is free software; you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * * the Free Software Foundation; either version 2 of the License, or * * (at your option) any later version. * * * * This program is distributed in the hope that it will be useful, * * but WITHOUT ANY WARRANTY; without even the implied warranty of * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * * GNU General Public License for more details. * * * * You should have received a copy of the GNU General Public License * * along with this program; if not, write to the Free Software * * Foundation, Inc., 51 Franklin Street, Fifth Floor, * * Boston, MA 02110-1301 USA * * * ***************************************************************************/ #ifndef HYPOTHESISTESTPRIVATE_H #define HYPOTHESISTESTPRIVATE_H #include class QStandardItemModel; class HypothesisTestPrivate { public: explicit HypothesisTestPrivate(HypothesisTest*); virtual ~HypothesisTestPrivate(); enum TestType {TestT, TestZ}; enum ErrorType {ErrorUnqualSize, ErrorEmptyColumn, NoError}; QString name() const; void setDataSourceSpreadsheet(Spreadsheet* spreadsheet); void setColumns(QStringList cols); void performTwoSampleIndependentTest(TestType test, bool categorical_variable = false, bool equal_variance = true); void performTwoSamplePairedTest(TestType test); void performOneSampleTest(TestType test); void performLeveneTest(bool categorical_variable); HypothesisTest* const q; HypothesisTest::DataSourceType dataSourceType{HypothesisTest::DataSourceSpreadsheet}; Spreadsheet* dataSourceSpreadsheet{nullptr}; QVector m_columns; QStringList all_columns; bool m_dbCreated{false}; int m_rowCount{0}; int m_columnCount{0}; QString m_currTestName{"Result Table"}; double m_population_mean; double m_significance_level; QString m_stats_table; HypothesisTest::TailType tail_type; private: - void countPartitions(const Column* column, int &np, int &total_rows); + void countPartitions(Column* column, int &np, int &total_rows); ErrorType findStats(const Column* column,int &count, double &sum, double &mean, double &std); ErrorType findStatsPaired(const Column *column1, const Column *column2, int &count, double &sum, double &mean, double &std); - ErrorType findStatsCategorical(const Column *column1, const Column *column2, int n[], double sum[], double mean[], double std[], QMap &col_name, const int &np, const int &total_rows); + ErrorType findStatsCategorical(Column *column1, Column *column2, int n[], double sum[], double mean[], double std[], QMap &col_name, const int &np, const int &total_rows); double getPValue(const TestType &test, double &value, const QString &col1_name, const QString &col2_name, const int df); QString getHtmlTable(int row, int column, QVariant *row_major); void printLine(const int &index, const QString &msg, const QString &color = "black"); void printError(const QString &error_msg); void clearGlobalVariables(); }; #endif diff --git a/src/kdefrontend/hypothesis_test/HypothesisTestView.cpp b/src/kdefrontend/hypothesis_test/HypothesisTestView.cpp index 71a3b7342..55516969f 100644 --- a/src/kdefrontend/hypothesis_test/HypothesisTestView.cpp +++ b/src/kdefrontend/hypothesis_test/HypothesisTestView.cpp @@ -1,198 +1,200 @@ /*************************************************************************** File : HypothesisTestView.cpp Project : LabPlot Description : View class for Hypothesis Tests' Table -------------------------------------------------------------------- Copyright : (C) 2019 Devanshu Agarwal(agarwaldevanshu8@gmail.com) ***************************************************************************/ /*************************************************************************** * * * This program is free software; you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * * the Free Software Foundation; either version 2 of the License, or * * (at your option) any later version. * * * * This program is distributed in the hope that it will be useful, * * but WITHOUT ANY WARRANTY; without even the implied warranty of * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * * GNU General Public License for more details. * * * * You should have received a copy of the GNU General Public License * * along with this program; if not, write to the Free Software * * Foundation, Inc., 51 Franklin Street, Fifth Floor, * * Boston, MA 02110-1301 USA * * * ***************************************************************************/ #include "HypothesisTestView.h" #include "backend/hypothesis_test/HypothesisTest.h" #include "backend/lib/macros.h" #include "backend/lib/trace.h" #include #include #include #include #include #include #include #include #include #include +#include #include #include #include #include #include /*! \class HypothesisTestView \brief View class for Hypothesis Test \ingroup kdefrontend */ HypothesisTestView::HypothesisTestView(HypothesisTest* hypothesisTest) : QWidget(), m_hypothesisTest(hypothesisTest), m_testName(new QLabel()), - m_statsTable(new QLabel()), + m_statsTable(new QTextEdit()), m_summaryResults(new QWidget()){ - m_summaryResults->setMouseTracking(true); + m_statsTable->setReadOnly(true); + auto* layout = new QVBoxLayout(this); layout->addWidget(m_testName); layout->addWidget(m_statsTable); layout->addWidget(m_summaryResults); layout->addWidget(m_summaryResults); init(); } HypothesisTestView::~HypothesisTestView() = default; void HypothesisTestView::init() { initActions(); initMenus(); // m_summaryResults->setStyleSheet("background-color:white; border: 0px; margin: 0px; padding 0px;qproperty-frame: false;"); QVBoxLayout* summary_layout = new QVBoxLayout(m_summaryResults); for (int i = 0; i < 10; i++) { m_resultLine[i] = new QLabel(); summary_layout->addWidget(m_resultLine[i]); } connect(m_hypothesisTest, &HypothesisTest::changed, this, &HypothesisTestView::changed); } void HypothesisTestView::initActions() { } void HypothesisTestView::initMenus() { } void HypothesisTestView::setResultLine(int index, QVariant data, Qt::ItemDataRole role) { if (index < 0 || index >= 10) return; if (role == Qt::DisplayRole) m_resultLine[index]->setText(data.toString()); else if (role == Qt::ToolTipRole) m_resultLine[index]->setToolTip(data.toString()); } void HypothesisTestView::clearResult() { for (int i = 0; i < 10; i++) m_resultLine[i]->clear(); } void HypothesisTestView::connectActions() { } void HypothesisTestView::fillToolBar(QToolBar* toolBar) { Q_UNUSED(toolBar); } /*! * Populates the menu \c menu with the pivot table and pivot table view relevant actions. * The menu is used * - as the context menu in PivotTableView * - as the "pivot table menu" in the main menu-bar (called form MainWin) * - as a part of the pivot table context menu in project explorer */ void HypothesisTestView::createContextMenu(QMenu* menu) { Q_ASSERT(menu); } bool HypothesisTestView::exportView() { return true; } bool HypothesisTestView::printView() { QPrinter printer; auto* dlg = new QPrintDialog(&printer, this); dlg->setWindowTitle(i18nc("@title:window", "Print Spreadsheet")); bool ret; if ((ret = dlg->exec()) == QDialog::Accepted) { print(&printer); } delete dlg; return ret; } bool HypothesisTestView::printPreview() { QPrintPreviewDialog* dlg = new QPrintPreviewDialog(this); connect(dlg, &QPrintPreviewDialog::paintRequested, this, &HypothesisTestView::print); return dlg->exec(); } /*! prints the complete spreadsheet to \c printer. */ void HypothesisTestView::print(QPrinter* printer) const { WAIT_CURSOR; QPainter painter (printer); RESET_CURSOR; } void HypothesisTestView::changed() { m_testName->setText(m_hypothesisTest->testName()); - m_statsTable->setText(m_hypothesisTest->statsTable()); + m_statsTable->setHtml(m_hypothesisTest->statsTable()); } void HypothesisTestView::exportToFile(const QString& path, const bool exportHeader, const QString& separator, QLocale::Language language) const { Q_UNUSED(exportHeader); Q_UNUSED(separator); Q_UNUSED(language); QFile file(path); if (!file.open(QFile::WriteOnly | QFile::Truncate)) return; PERFTRACE("export pivot table to file"); } void HypothesisTestView::exportToLaTeX(const QString & path, const bool exportHeaders, const bool gridLines, const bool captions, const bool latexHeaders, const bool skipEmptyRows, const bool exportEntire) const { Q_UNUSED(exportHeaders); Q_UNUSED(gridLines); Q_UNUSED(captions); Q_UNUSED(latexHeaders); Q_UNUSED(skipEmptyRows); Q_UNUSED(exportEntire); QFile file(path); if (!file.open(QFile::WriteOnly | QFile::Truncate)) return; PERFTRACE("export pivot table to latex"); } diff --git a/src/kdefrontend/hypothesis_test/HypothesisTestView.h b/src/kdefrontend/hypothesis_test/HypothesisTestView.h index 9e54af4ab..ae26f03f1 100644 --- a/src/kdefrontend/hypothesis_test/HypothesisTestView.h +++ b/src/kdefrontend/hypothesis_test/HypothesisTestView.h @@ -1,93 +1,92 @@ /*************************************************************************** File : PivotTableView.h Project : LabPlot Description : View class for Hypothesis Tests' -------------------------------------------------------------------- Copyright : (C) 2019 Devanshu Agarwal(agarwaldevanshu8@gmail.com) ***************************************************************************/ /*************************************************************************** * * * This program is free software; you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * * the Free Software Foundation; either version 2 of the License, or * * (at your option) any later version. * * * * This program is distributed in the hope that it will be useful, * * but WITHOUT ANY WARRANTY; without even the implied warranty of * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * * GNU General Public License for more details. * * * * You should have received a copy of the GNU General Public License * * along with this program; if not, write to the Free Software * * Foundation, Inc., 51 Franklin Street, Fifth Floor, * * Boston, MA 02110-1301 USA * * * ***************************************************************************/ #ifndef HYPOTHESISTESTVIEW_H #define HYPOTHESISTESTVIEW_H #include #include "backend/core/AbstractColumn.h" #include "backend/lib/IntervalAttribute.h" class Column; class HypothesisTest; class HypothesisTestModel; class AbstractAspect; class QTableView; class QHeaderView; class QListView; class QPrinter; class QMenu; class QToolBar; class QModelIndex; class QItemSelection; class QLabel; class QTextEdit; -class QLineEdit; class HypothesisTestView : public QWidget { Q_OBJECT public: explicit HypothesisTestView(HypothesisTest*); ~HypothesisTestView() override; bool exportView(); bool printView(); bool printPreview(); private: void init(); void initActions(); void initMenus(); void connectActions(); void exportToFile(const QString&, const bool, const QString&, QLocale::Language) const; void exportToLaTeX(const QString&, const bool exportHeaders, const bool gridLines, const bool captions, const bool latexHeaders, const bool skipEmptyRows,const bool exportEntire) const; HypothesisTest* m_hypothesisTest; QLabel* m_testName; - QLabel* m_statsTable; + QTextEdit* m_statsTable; QWidget* m_summaryResults; QLabel* m_resultLine[10]; public slots: void createContextMenu(QMenu*); void fillToolBar(QToolBar*); void print(QPrinter*) const; void changed(); void setResultLine(int index, QVariant data, Qt::ItemDataRole role = Qt::DisplayRole); void clearResult(); private slots: }; #endif