diff --git a/src/backend/hypothesis_test/HypothesisTest.h b/src/backend/hypothesis_test/HypothesisTest.h --- a/src/backend/hypothesis_test/HypothesisTest.h +++ b/src/backend/hypothesis_test/HypothesisTest.h @@ -70,6 +70,7 @@ void performTwoSampleIndependentZTest(); void performTwoSamplePairedZTest(); void performOneSampleZTest(); + void performOneWayAnova(); void performLeveneTest(bool categorical_variable); //virtual methods diff --git a/src/backend/hypothesis_test/HypothesisTest.cpp b/src/backend/hypothesis_test/HypothesisTest.cpp --- a/src/backend/hypothesis_test/HypothesisTest.cpp +++ b/src/backend/hypothesis_test/HypothesisTest.cpp @@ -46,6 +46,7 @@ #include #include #include +#include HypothesisTest::HypothesisTest(const QString &name) : AbstractPart(name), d(new HypothesisTestPrivate(this)) { @@ -137,6 +138,11 @@ d->performOneSampleTest(HypothesisTestPrivate::TestZ); } +void HypothesisTest::performOneWayAnova() { + d->m_currTestName = "

One Way Anova

"; + d->performOneWayAnova(); +} + void HypothesisTest::performLeveneTest(bool categorical_variable) { d->m_currTestName = "

Levene Test for Equality of Variance

"; d->performLeveneTest(categorical_variable); @@ -146,6 +152,8 @@ * Private Implementations * ****************************************************************************/ +//TODO: round off numbers while printing + HypothesisTestPrivate::HypothesisTestPrivate(HypothesisTest* owner) : q(owner) { } @@ -185,6 +193,7 @@ double value; int df = 0; double p_value = 0; + double sp = 0; clearGlobalVariables(); if (m_columns.size() != 2) { @@ -258,7 +267,7 @@ if (equal_variance) { df = n[0] + n[1] - 2; - double sp = qSqrt( ((n[0]-1)*qPow(std[0],2) + (n[1]-1)*qPow(std[1],2))/df); + sp = qSqrt( ((n[0]-1)*qPow(std[0],2) + (n[1]-1)*qPow(std[1],2))/df); value = (mean[0] - mean[1])/(sp*qSqrt(1.0/n[0] + 1.0/n[1])); printLine(9, "Assumption: Equal Variance b/w both population means"); } else { @@ -273,15 +282,14 @@ break; } case TestZ: { test_name = "Z"; - df = n[0] + n[1] - 2; - - double sp = qSqrt( ((n[0]-1)*qPow(std[0],2) + (n[1]-1)*qPow(std[1],2))/df); + sp = qSqrt( ((n[0]-1)*qPow(std[0],2) + (n[1]-1)*qPow(std[1],2))/df); value = (mean[0] - mean[1])/(sp*qSqrt(1.0/n[0] + 1.0/n[1])); + p_value = gsl_cdf_gaussian_P(value, sp); } } m_currTestName = i18n("

Two Sample Independent %1 Test for %2 vs %3

", test_name, col1_name, col2_name); - p_value = getPValue(test, value, col1_name, col2_name, df); + p_value = getPValue(test, value, col1_name, col2_name, (mean[0] - mean[1]), sp, df); printLine(2, i18n("Significance level is %1", m_significance_level), "blue"); printLine(4, i18n("%1 Value is %2 ", test_name, value), "green"); @@ -368,7 +376,7 @@ break; }} - p_value = getPValue(test, value, m_columns[0]->name(), i18n("%1",m_population_mean), df); + p_value = getPValue(test, value, m_columns[0]->name(), i18n("%1",m_population_mean), mean, std, df); m_currTestName = i18n("

One Sample %1 Test for %2 vs %3

", test_name, m_columns[0]->name(), m_columns[1]->name()); printLine(2, i18n("Significance level is %1 ", m_significance_level), "blue"); @@ -441,7 +449,7 @@ value = (mean - m_population_mean) / (std/qSqrt(n)); }} - p_value = getPValue(test, value, m_columns[0]->name(), i18n("%1",m_population_mean), df); + p_value = getPValue(test, value, m_columns[0]->name(), i18n("%1",m_population_mean), mean - m_population_mean, std, df); m_currTestName = i18n("

One Sample %1 Test for %2

", test_name, m_columns[0]->name()); printLine(2, i18n("Significance level is %1", m_significance_level), "blue"); @@ -458,6 +466,115 @@ } +/*************************************One Way Anova***************************************/ +void HypothesisTestPrivate::performOneWayAnova() { + // all standard variables and formulas are taken from this wikipedia page: + // https://en.wikipedia.org/wiki/One-way_analysis_of_variance + + // b stands for b/w groups + // w stands for within groups + clearGlobalVariables(); + int np, total_rows; + countPartitions(m_columns[0], np, total_rows); + + int* ni = new int[np]; + double* sum = new double[np]; + double* mean = new double[np]; + double* std = new double[np]; + QString* col_names = new QString[np]; + + QMap classname_to_index; + findStatsCategorical(m_columns[0], m_columns[1], ni, sum, mean, std, classname_to_index, np, total_rows); + + double y_bar = 0; + double s_b = 0; + int f_b = 0; + double ms_b = 0; + double s_w = 0; + int f_w = 0; + double ms_w = 0; + double f_value = 0; + double p_value = 0; + + // now finding mean of each group; + + for (int i = 0; i < np; i++) + y_bar += mean[i]; + y_bar = y_bar / np; + + for (int i = 0; i < np; i++) { + s_b += ni[i] * qPow( ( mean[i] - y_bar), 2); + if (ni[i] > 1) + s_w += qPow( std[i], 2)*(ni[i] - 1); + else + s_w += qPow( std[i], 2); + f_w += ni[i] - 1; + } + + f_b = np - 1; + ms_b = s_b / f_b; + + ms_w = s_w / f_w; + f_value = ms_b / ms_w; + + + p_value = nsl_stats_fdist_p(f_value, static_cast(np-1), f_w); + + QMapIterator i(classname_to_index); + while (i.hasNext()) { + i.next(); + col_names[i.value()-1] = i.key(); + } + + // now printing the statistics and result; + int row_count = np + 1, column_count = 5; + QVariant* row_major = new QVariant[row_count*column_count]; + // header data; + row_major[0] = ""; row_major[1] = "Ni"; row_major[2] = "Sum"; row_major[3] = "Mean"; row_major[4] = "Std"; + + // table data + for (int row_i = 1; row_i < row_count ; row_i++) { + row_major[row_i*column_count] = col_names[row_i - 1]; + row_major[row_i*column_count + 1] = ni[row_i - 1]; + row_major[row_i*column_count + 2] = sum[row_i - 1]; + row_major[row_i*column_count + 3] = QString::number( mean[row_i - 1], 'f', 3); + row_major[row_i*column_count + 4] = QString::number( std[row_i - 1], 'f', 3); + } + + m_stats_table = "

Group Summary Statistics

"; + + m_stats_table += getHtmlTable(row_count, column_count, row_major); + + m_stats_table += getLine(""); + m_stats_table += getLine(""); + m_stats_table += "

Grand Summary Statistics

"; + m_stats_table += getLine(""); + m_stats_table += getLine(i18n("Overall Mean is %1", y_bar)); + + row_count = 4; column_count = 3; + row_major->clear(); + + row_major[0] = ""; row_major[1] = "Between Groups"; row_major[2] = "Within Groups"; + + int base_index = 0; + base_index = 1*column_count; row_major[base_index + 0] = "Sum of Squares"; row_major[base_index + 1] = s_b; row_major[base_index + 2] = s_w; + base_index = 2*column_count; row_major[base_index + 0] = "Degree of Freedom"; row_major[base_index + 1] = f_b; row_major[base_index + 2] = f_w; + base_index = 3*column_count; row_major[base_index + 0] = "Mean Square Value"; row_major[base_index + 1] = ms_b; row_major[base_index + 2] = ms_w; + + m_stats_table += getHtmlTable(row_count, column_count, row_major); + + printLine(1, i18n("F Value is %1", f_value), "blue"); + printLine(2, i18n("P Value is %1 ", p_value), "green"); + + if (p_value <= m_significance_level) + q->m_view->setResultLine(2, i18n("We can safely reject Null Hypothesis for significance level %1", m_significance_level), Qt::ToolTipRole); + else + q->m_view->setResultLine(2, i18n("There is a plausibility for Null Hypothesis to be true"), Qt::ToolTipRole); + + emit q->changed(); + return; +} + /**************************************Levene Test****************************************/ void HypothesisTestPrivate::performLeveneTest(bool categorical_variable) { QString test_name; @@ -581,7 +698,6 @@ for (int j = 0; j < n; j++) { name = m_columns[0]->textAt(j); value = m_columns[1]->valueAt(j); - if (std::isnan(value)) { n = j; break; @@ -840,7 +956,7 @@ } -double HypothesisTestPrivate::getPValue(const HypothesisTestPrivate::TestType &test, double &value, const QString &col1_name, const QString &col2_name, const int df) { +double HypothesisTestPrivate::getPValue(const HypothesisTestPrivate::TestType &test, double &value, const QString &col1_name, const QString &col2_name, const double mean, const double sp, const int df) { double p_value = 0; //TODO change ("⋖") symbol to ("<"), currently macro UTF8_QSTRING is not working properly if used "<" symbol; @@ -848,18 +964,18 @@ case TestT: { switch (tail_type) { case HypothesisTest::TailNegative: - p_value = nsl_stats_tdist_p(value, df); + p_value = gsl_cdf_tdist_P(value, df); printLine(0, i18n("Null Hypothesis: Population mean of %1 %2 Population mean of %3", col1_name, UTF8_QSTRING("≥"), col2_name), "blue"); printLine(1, i18n("Alternate Hypothesis: Population mean of %1 %2 Population mean of %3", col1_name, UTF8_QSTRING("⋖"), col2_name), "blue"); break; case HypothesisTest::TailPositive: value *= -1; - p_value = nsl_stats_tdist_p(value, df); + p_value = gsl_cdf_tdist_P(value, df); printLine(0, i18n("Null Hypothesis: Population mean of %1 %2 Population mean of %3", col1_name, UTF8_QSTRING("≤"), col2_name), "blue"); printLine(1, i18n("Alternate Hypothesis: Population mean of %1 %2 Population mean of %3", col1_name, UTF8_QSTRING(">"), col2_name), "blue"); break; case HypothesisTest::TailTwo: - p_value = nsl_stats_tdist_p(value, df) + nsl_stats_tdist_p(-1*value, df); + p_value = 2.*gsl_cdf_tdist_P(value, df); printLine(0, i18n("Null Hypothesis: Population mean of %1 %2 Population mean of %3", col1_name, UTF8_QSTRING("="), col2_name), "blue"); printLine(1, i18n("Alternate Hypothesis: Population mean of %1 %2 Population mean of %3", col1_name, UTF8_QSTRING("≠"), col2_name), "blue"); @@ -869,19 +985,18 @@ } case TestZ: { switch (tail_type) { case HypothesisTest::TailNegative: - p_value = nsl_stats_tdist_p(value, df); + p_value = gsl_cdf_gaussian_P(value - mean, sp); printLine(0, i18n("Null Hypothesis: Population mean of %1 %2 Population mean of %3 ", col1_name, UTF8_QSTRING("≥"), col2_name), "blue"); printLine(1, i18n("Alternate Hypothesis: Population mean of %1 %2 Population mean of %3 ", col1_name, UTF8_QSTRING("⋖"), col2_name), "blue"); break; case HypothesisTest::TailPositive: value *= -1; - p_value = nsl_stats_tdist_p(value, df); + p_value = nsl_stats_tdist_p(value - mean, sp); printLine(0, i18n("Null Hypothesis: Population mean of %1 %2 Population mean of %3 ", col1_name, UTF8_QSTRING("≤"), col2_name), "blue"); printLine(1, i18n("Alternate Hypothesis: Population mean of %1 %2 Population mean of %3 ", col1_name, UTF8_QSTRING(">"), col2_name), "blue"); break; case HypothesisTest::TailTwo: - p_value = nsl_stats_tdist_p(value, df) + nsl_stats_tdist_p(-1*value, df); - + p_value = 2.*gsl_cdf_gaussian_P(value - mean, sp); printLine(0, i18n("Null Hypothesis: Population mean of %1 %2 Population mean of %3 ", col1_name, UTF8_QSTRING("="), col2_name), "blue"); printLine(1, i18n("Alternate Hypothesis: Population mean of %1 %2 Population mean of %3 ", col1_name, UTF8_QSTRING("≠"), col2_name), "blue"); break; @@ -949,9 +1064,13 @@ return table; } +QString HypothesisTestPrivate::getLine(const QString &msg, const QString &color) { + return i18n("

%2

", color, msg); +} + void HypothesisTestPrivate::printLine(const int &index, const QString &msg, const QString &color) { - q->m_view->setResultLine(index, i18n("

%2

", color, msg)); + q->m_view->setResultLine(index, getLine(msg, color)); return; } diff --git a/src/backend/hypothesis_test/HypothesisTestPrivate.h b/src/backend/hypothesis_test/HypothesisTestPrivate.h --- a/src/backend/hypothesis_test/HypothesisTestPrivate.h +++ b/src/backend/hypothesis_test/HypothesisTestPrivate.h @@ -48,6 +48,7 @@ void performTwoSampleIndependentTest(TestType test, bool categorical_variable = false, bool equal_variance = true); void performTwoSamplePairedTest(TestType test); void performOneSampleTest(TestType test); + void performOneWayAnova(); void performLeveneTest(bool categorical_variable); @@ -72,9 +73,10 @@ ErrorType findStatsPaired(const Column *column1, const Column *column2, int &count, double &sum, double &mean, double &std); ErrorType findStatsCategorical(Column *column1, Column *column2, int n[], double sum[], double mean[], double std[], QMap &col_name, const int &np, const int &total_rows); - double getPValue(const TestType &test, double &value, const QString &col1_name, const QString &col2_name, const int df); + double getPValue(const TestType &test, double &value, const QString &col1_name, const QString &col2_name, const double mean, const double sp, const int df); QString getHtmlTable(int row, int column, QVariant *row_major); + QString getLine(const QString &msg, const QString &color = "black"); void printLine(const int &index, const QString &msg, const QString &color = "black"); void printError(const QString &error_msg); void clearGlobalVariables(); diff --git a/src/kdefrontend/dockwidgets/HypothesisTestDock.h b/src/kdefrontend/dockwidgets/HypothesisTestDock.h --- a/src/kdefrontend/dockwidgets/HypothesisTestDock.h +++ b/src/kdefrontend/dockwidgets/HypothesisTestDock.h @@ -39,6 +39,8 @@ class TreeViewComboBox; class KConfig; class QScrollArea; +class QStandardItemModel; +class QStandardItem; class HypothesisTestDock : public QWidget { Q_OBJECT @@ -75,19 +77,34 @@ bool two_sample_independent{false}; bool two_sample_paired{false}; bool one_sample{false}; + bool anova{false}; + bool one_way{false}; + bool two_way{false}; QScrollArea* scroll_dock; + void countPartitions(Column *column, int &np, int &total_rows); + void setColumnsComboBoxModel(Spreadsheet* spreadsheet); + void setColumnsComboBoxView(); + bool nonEmptySelectedColumns(); + + QStringList test_type_t_z; + QStringList test_type_anova; + + QStringList only_values_cols; + QStringList two_categorical_cols; + QStringList more_than_two_categorical_cols; private slots: //SLOTs for changes triggered in PivotTableDock // void nameChanged(); // void commentChanged(); void dataSourceTypeChanged(int); + void showTestType(); void showHypothesisTest(); void doHypothesisTest(); void performLeveneTest(); void spreadsheetChanged(const QModelIndex&); - void col1CatIndexChanged(int index); + void col1IndexChanged(int index); // void connectionChanged(); // void tableChanged(); // void showDatabaseManager(); diff --git a/src/kdefrontend/dockwidgets/HypothesisTestDock.cpp b/src/kdefrontend/dockwidgets/HypothesisTestDock.cpp --- a/src/kdefrontend/dockwidgets/HypothesisTestDock.cpp +++ b/src/kdefrontend/dockwidgets/HypothesisTestDock.cpp @@ -1,4 +1,4 @@ -/*************************************************************************** +/*************************************************************************** File : HypothesisTestDock.cpp Project : LabPlot Description : widget for hypothesis test properties @@ -45,6 +45,9 @@ #include #include #include + +#include +#include /*! \class HypothesisTestDock \brief Provides a dock (widget) for hypothesis testing: @@ -66,20 +69,19 @@ ui.bDatabaseManager->setToolTip(i18n("Manage connections")); m_configPath = QStandardPaths::standardLocations(QStandardPaths::AppDataLocation).constFirst() + "sql_connections"; - - - // adding item to tests and testtype combo box; ui.cbTest->addItem(i18n("T Test")); ui.cbTest->addItem(i18n("Z Test")); + ui.cbTest->addItem(i18n("Anova")); + + test_type_t_z.append("Two Sample Independent"); + test_type_t_z.append("Two Sample Paired"); + test_type_t_z.append("One Sample"); - ui.cbTestType->addItem(i18n("Two Sample Independent")); - ui.cbTestType->addItem(i18n("Two Sample Paired")); - ui.cbTestType->addItem(i18n("One Sample")); + test_type_anova.append("One Way"); + test_type_anova.append("Two Way"); // making all test blocks invisible at starting. - ui.lCol1Categorical->setVisible(false); - ui.cbCol1Categorical->setVisible(false); ui.pbLeveneTest->setVisible(false); ui.chbCategorical->setVisible(false); ui.lCol1->setVisible(false); @@ -184,7 +186,7 @@ // ui.bRemoveColumn->setEnabled(!ui.lwColumns->selectedItems().isEmpty()); // }); - connect(ui.cbTest, static_cast(&QComboBox::activated), this, &HypothesisTestDock::showHypothesisTest); + connect(ui.cbTest, static_cast(&QComboBox::activated), this, &HypothesisTestDock::showTestType); connect(ui.cbTestType, static_cast(&QComboBox::activated), this, &HypothesisTestDock::showHypothesisTest); // connect(ui.cbTest, static_cast(&QComboBox::currentIndexChanged), this, &HypothesisTestDock::showHypothesisTest); // connect(ui.cbTestType, static_cast(&QComboBox::currentIndexChanged), this, &HypothesisTestDock::showHypothesisTest); @@ -197,7 +199,7 @@ connect(ui.rbH1OneTail2, &QRadioButton::toggled, this, &HypothesisTestDock::onRbH1OneTail2Toggled); connect(ui.rbH1TwoTail, &QRadioButton::toggled, this, &HypothesisTestDock::onRbH1TwoTailToggled); - connect(ui.cbCol1Categorical, static_cast(&QComboBox::currentIndexChanged), this, &HypothesisTestDock::col1CatIndexChanged); + connect(ui.cbCol1, static_cast(&QComboBox::currentIndexChanged), this, &HypothesisTestDock::col1IndexChanged); } void HypothesisTestDock::setHypothesisTest(HypothesisTest* HypothesisTest) { @@ -225,17 +227,7 @@ // else // ui.cbConnection->setCurrentIndex(ui.cbConnection->findText(m_hypothesisTest->dataSourceConnection())); - //clearing all cbCol* - ui.cbCol1->clear(); - ui.cbCol2->clear(); - ui.cbCol1Categorical->clear(); - for (auto* col : m_hypothesisTest->dataSourceSpreadsheet()->children()) { - ui.cbCol1Categorical->addItem(col->name()); - if (col->columnMode() == AbstractColumn::Integer || col->columnMode() == AbstractColumn::Numeric) { - ui.cbCol2->addItem(col->name()); - ui.cbCol1->addItem(col->name()); - } - } + setColumnsComboBoxModel(m_hypothesisTest->dataSourceSpreadsheet()); this->dataSourceTypeChanged(ui.cbDataSourceType->currentIndex()); @@ -257,25 +249,36 @@ // m_initializing = false; } -void HypothesisTestDock::showHypothesisTest() { +void HypothesisTestDock::showTestType() { ttest = ui.cbTest->currentText() == "T Test"; ztest = ui.cbTest->currentText() == "Z Test"; + anova = ui.cbTest->currentText() == "Anova"; + + ui.cbTestType->clear(); + + if (ttest || ztest) + ui.cbTestType->addItems(test_type_t_z); + if (anova) + ui.cbTestType->addItems(test_type_anova); + showHypothesisTest(); +} + +void HypothesisTestDock::showHypothesisTest() { + one_way = ui.cbTestType->currentText() == "One Way"; + two_way = ui.cbTestType->currentText() == "Two Way"; two_sample_independent = ui.cbTestType->currentText() == "Two Sample Independent"; two_sample_paired = ui.cbTestType->currentText() == "Two Sample Paired"; one_sample = ui.cbTestType->currentText() == "One Sample"; - ui.lCol1Categorical->setVisible(two_sample_independent); - ui.cbCol1Categorical->setVisible(two_sample_independent); - ui.lCol1->setVisible(two_sample_paired); - ui.cbCol1->setVisible(two_sample_paired); - ui.lCol2->setVisible(two_sample_independent || two_sample_paired || one_sample); - ui.cbCol2->setVisible(two_sample_independent || two_sample_paired || one_sample); + ui.lCol1->setVisible(anova || two_sample_independent || two_sample_paired); + ui.cbCol1->setVisible(anova || two_sample_independent || two_sample_paired); + ui.lCol2->setVisible(anova || two_sample_independent || two_sample_paired || one_sample); + ui.cbCol2->setVisible(anova || two_sample_independent || two_sample_paired || one_sample); ui.chbEqualVariance->setVisible(ttest && two_sample_independent); ui.chbCategorical->setVisible(ttest && two_sample_independent); - ui.pbLeveneTest->setVisible(ttest && two_sample_independent); + ui.pbLeveneTest->setVisible(anova || (ttest && two_sample_independent)); ui.chbEqualVariance->setChecked(true); - ui.pbPerformTest->setEnabled(two_sample_independent || two_sample_paired || one_sample); ui.rbH1OneTail2->setVisible(two_sample_independent || two_sample_paired || one_sample); ui.rbH1OneTail1->setVisible(two_sample_independent || two_sample_paired || one_sample); @@ -290,14 +293,17 @@ ui.lMuo->setVisible(one_sample); ui.leMuo->setVisible(one_sample); - ui.lAlpha->setVisible(two_sample_independent || two_sample_paired || one_sample); - ui.leAlpha->setVisible(two_sample_independent || two_sample_paired || one_sample); + ui.lAlpha->setVisible(anova || two_sample_independent || two_sample_paired || one_sample); + ui.leAlpha->setVisible(anova || two_sample_independent || two_sample_paired || one_sample); ui.leMuo->setText( i18n("%1", population_mean)); ui.leAlpha->setText( i18n("%1", significance_level)); - if (two_sample_independent) - ui.lCol2->setText( i18n("Independent Variable")); + setColumnsComboBoxView(); + + ui.pbPerformTest->setEnabled(nonEmptySelectedColumns() && + (anova || + two_sample_independent || two_sample_paired || one_sample)); } void HypothesisTestDock::doHypothesisTest() { @@ -308,7 +314,7 @@ QStringList cols; if(ttest) { if(two_sample_independent) { - cols << ui.cbCol1Categorical->currentText() << ui.cbCol2->currentText(); + cols << ui.cbCol1->currentText() << ui.cbCol2->currentText(); m_hypothesisTest->setColumns(cols); m_hypothesisTest->performTwoSampleIndependentTTest(ui.chbCategorical->isChecked(), ui.chbEqualVariance->isChecked()); } @@ -326,7 +332,7 @@ } else if(ztest) { if(two_sample_independent) { - cols << ui.cbCol1Categorical->currentText(); + cols << ui.cbCol1->currentText(); cols << ui.cbCol2->currentText(); m_hypothesisTest->setColumns(cols); m_hypothesisTest->performTwoSampleIndependentZTest(); @@ -343,12 +349,19 @@ m_hypothesisTest->performOneSampleZTest(); } } + else if(anova) { + QStringList cols; + if(one_way) { + cols << ui.cbCol1->currentText() << ui.cbCol2->currentText(); + m_hypothesisTest->setColumns(cols); + m_hypothesisTest->performOneWayAnova(); + } + } } void HypothesisTestDock::performLeveneTest() { - QStringList cols; - cols << ui.cbCol1Categorical->currentText() << ui.cbCol2->currentText(); + cols << ui.cbCol1->currentText() << ui.cbCol2->currentText(); m_hypothesisTest->setColumns(cols); m_hypothesisTest->setSignificanceLevel(ui.leAlpha->text()); @@ -504,25 +517,12 @@ void HypothesisTestDock::spreadsheetChanged(const QModelIndex& index) { auto* aspect = static_cast(index.internalPointer()); Spreadsheet* spreadsheet = dynamic_cast(aspect); - - //clear the previous definitions of combo-box columns - //clearing all cbCol* - ui.cbCol1->clear(); - ui.cbCol2->clear(); - ui.cbCol1Categorical->clear(); - for (auto* col : m_hypothesisTest->dataSourceSpreadsheet()->children()) { - ui.cbCol1Categorical->addItem(col->name()); - if (col->columnMode() == AbstractColumn::Integer || col->columnMode() == AbstractColumn::Numeric) { - ui.cbCol2->addItem(col->name()); - ui.cbCol1->addItem(col->name()); - } - } - + setColumnsComboBoxModel(spreadsheet); m_hypothesisTest->setDataSourceSpreadsheet(spreadsheet); } // currently no need of this slot -void HypothesisTestDock::col1CatIndexChanged(int index) { +void HypothesisTestDock::col1IndexChanged(int index) { if (index < 0) return; if (two_sample_paired) { @@ -530,7 +530,7 @@ return; } - QString selected_text = ui.cbCol1Categorical->currentText(); + QString selected_text = ui.cbCol1->currentText(); Column* col1 = m_hypothesisTest->dataSourceSpreadsheet()->column(selected_text); if (col1->columnMode() == AbstractColumn::Integer || col1->columnMode() == AbstractColumn::Numeric) { @@ -580,7 +580,7 @@ // if (DatabaseManagerWidget::isFileDB(driver)) { // if (!QFile::exists(dbName)) { // KMessageBox::error(this, i18n("Couldn't find the database file '%1'. Please check the connection settings.", dbName), -// i18n("Connection Failed")); +// appendRow i18n("Connection Failed")); // return; // } else // m_db.setDatabaseName(dbName); @@ -698,3 +698,84 @@ ui.rbH0TwoTail->setChecked(true); m_hypothesisTest->setTailType(HypothesisTest::TailTwo); } + + +/**************************************Helper Functions********************************************/ + +void HypothesisTestDock::countPartitions(Column *column, int &np, int &total_rows) { + total_rows = column->rowCount(); + np = 0; + QString cell_value; + QMap discovered_categorical_var; + + AbstractColumn::ColumnMode original_col_mode = column->columnMode(); + column->setColumnMode(AbstractColumn::Text); + + for (int i = 0; i < total_rows; i++) { + cell_value = column->textAt(i); + + if (cell_value.isEmpty()) { + total_rows = i; + break; + } + + if (discovered_categorical_var[cell_value]) + continue; + + discovered_categorical_var[cell_value] = true; + np++; + } + column->setColumnMode(original_col_mode); +} + +void HypothesisTestDock::setColumnsComboBoxModel(Spreadsheet* spreadsheet) { + only_values_cols.clear(); + two_categorical_cols.clear(); + more_than_two_categorical_cols.clear(); + + for (auto* col : spreadsheet->children()) { + if (col->columnMode() == AbstractColumn::Integer || col->columnMode() == AbstractColumn::Numeric) + only_values_cols.append(col->name()); + else { + int np = 0, n_rows = 0; + countPartitions(col, np, n_rows); + if (np <= 1) + continue; + else if (np == 2) + two_categorical_cols.append(col->name()); + else + more_than_two_categorical_cols.append(col->name()); + } + } + setColumnsComboBoxView(); + showHypothesisTest(); +} + +void HypothesisTestDock::setColumnsComboBoxView() { + + ui.cbCol1->clear(); + ui.cbCol2->clear(); + if (two_sample_independent) { + ui.cbCol1->addItems(only_values_cols); + ui.cbCol1->addItems(two_categorical_cols); + + ui.cbCol2->addItems(only_values_cols); + } else if (two_sample_paired) { + ui.cbCol1->addItems(only_values_cols); + ui.cbCol2->addItems(only_values_cols); + } else if (one_sample) + ui.cbCol1->addItems(only_values_cols); + else if (anova) { + ui.cbCol1->addItems(two_categorical_cols); + ui.cbCol1->addItems(more_than_two_categorical_cols); + ui.cbCol2->addItems(only_values_cols); + } +} + +bool HypothesisTestDock::nonEmptySelectedColumns() { + if (ui.cbCol1->isVisible() && ui.cbCol1->count() < 1) + return false; + if (ui.cbCol2->isVisible() && ui.cbCol2->count() < 1) + return false; + return true; +} diff --git a/src/kdefrontend/ui/dockwidgets/hypothesistestdock.ui b/src/kdefrontend/ui/dockwidgets/hypothesistestdock.ui --- a/src/kdefrontend/ui/dockwidgets/hypothesistestdock.ui +++ b/src/kdefrontend/ui/dockwidgets/hypothesistestdock.ui @@ -349,46 +349,33 @@ - 16 - 373 + 20 + 350 394 - 50 + 73 - - - - Independent Variable - - - Qt::AlignCenter - - + + - + Independent Variable - - + + + + + - Variable + Independent Variable - - - - - - - - -