diff --git a/hypothesisTest/HypothesisTest.h b/hypothesisTest/HypothesisTest.h
new file mode 100644
--- /dev/null
+++ b/hypothesisTest/HypothesisTest.h
@@ -0,0 +1,100 @@
+/***************************************************************************
+ File : HypothesisTest.h
+ Project : LabPlot
+ Description : Doing Hypothesis-Test on data provided
+ --------------------------------------------------------------------
+ Copyright : (C) 2019 Devanshu Agarwal(agarwaldevanshu8@gmail.com)
+
+ ***************************************************************************/
+
+/***************************************************************************
+ * *
+ * This program is free software; you can redistribute it and/or modify *
+ * it under the terms of the GNU General Public License as published by *
+ * the Free Software Foundation; either version 2 of the License, or *
+ * (at your option) any later version. *
+ * *
+ * This program is distributed in the hope that it will be useful, *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
+ * GNU General Public License for more details. *
+ * *
+ * You should have received a copy of the GNU General Public License *
+ * along with this program; if not, write to the Free Software *
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, *
+ * Boston, MA 02110-1301 USA *
+ * *
+ ***************************************************************************/
+
+#ifndef HYPOTHESISTEST_H
+#define HYPOTHESISTEST_H
+
+#include "backend/core/AbstractPart.h"
+#include "backend/lib/macros.h"
+
+class HypothesisTestPrivate;
+class HypothesisTestView;
+class Spreadsheet;
+class QString;
+class Column;
+class QLayout;
+
+class HypothesisTest : public AbstractPart {
+ Q_OBJECT
+
+public:
+ explicit HypothesisTest(const QString& name);
+ ~HypothesisTest() override;
+
+ enum DataSourceType {DataSourceSpreadsheet, DataSourceDatabase};
+ enum TailType {TailPositive, TailNegative, TailTwo};
+
+ void setDataSourceType(DataSourceType type);
+ DataSourceType dataSourceType() const;
+ void setDataSourceSpreadsheet(Spreadsheet* spreadsheet);
+
+ void setColumns(const QVector& cols);
+ void setColumns(QStringList cols);
+ QStringList allColumns();
+ void setTailType(TailType tailType);
+ TailType tailType();
+ void setPopulationMean(QVariant populationMean);
+ void setSignificanceLevel(QVariant alpha);
+ QString testName();
+ QString statsTable();
+
+ void performTwoSampleIndependentTTest(bool categorical_variable, bool equal_variance);
+ void performTwoSamplePairedTTest();
+ void performOneSampleTTest();
+ void performTwoSampleIndependentZTest();
+ void performTwoSamplePairedZTest();
+ void performOneSampleZTest();
+ void performOneWayAnova();
+
+ void performLeveneTest(bool categorical_variable);
+ //virtual methods
+// QIcon icon() const override;
+ QMenu* createContextMenu() override;
+ QWidget* view() const override;
+
+ bool exportView() const override;
+ bool printView() override;
+ bool printPreview() const override;
+
+ void save(QXmlStreamWriter*) const override;
+ bool load(XmlStreamReader*, bool preview) override;
+
+ Spreadsheet* dataSourceSpreadsheet() const;
+private:
+ HypothesisTestPrivate* const d;
+ mutable HypothesisTestView* m_view{nullptr};
+ friend class HypothesisTestPrivate;
+
+signals:
+ void changed();
+ void requestProjectContextMenu(QMenu*);
+ void dataSourceTypeChanged(HypothesisTest::DataSourceType);
+ void dataSourceSpreadsheetChanged(Spreadsheet*);
+};
+
+#endif // HypothesisTest_H
diff --git a/hypothesisTest/HypothesisTest.cpp b/hypothesisTest/HypothesisTest.cpp
new file mode 100644
--- /dev/null
+++ b/hypothesisTest/HypothesisTest.cpp
@@ -0,0 +1,1192 @@
+/***************************************************************************
+ File : HypothesisTest.cpp
+ Project : LabPlot
+ Description : Doing Hypothesis-Test on data provided
+ --------------------------------------------------------------------
+ Copyright : (C) 2019 Devanshu Agarwal(agarwaldevanshu8@gmail.com)
+
+ ***************************************************************************/
+
+/***************************************************************************
+ * *
+ * This program is free software; you can redistribute it and/or modify *
+ * it under the terms of the GNU General Public License as published by *
+ * the Free Software Foundation; either version 2 of the License, or *
+ * (at your option) any later version. *
+ * *
+ * This program is distributed in the hope that it will be useful, *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
+ * GNU General Public License for more details. *
+ * *
+ * You should have received a copy of the GNU General Public License *
+ * along with this program; if not, write to the Free Software *
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, *
+ * Boston, MA 02110-1301 USA *
+ * *
+ ***************************************************************************/
+
+#include "HypothesisTest.h"
+#include "HypothesisTestPrivate.h"
+#include "kdefrontend/hypothesisTest/HypothesisTestView.h"
+#include "backend/spreadsheet/Spreadsheet.h"
+#include "backend/core/column/Column.h"
+#include "backend/lib/macros.h"
+
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+
+#include
+#include
+
+extern "C" {
+#include "backend/nsl/nsl_stats.h"
+}
+
+HypothesisTest::HypothesisTest(const QString &name) : AbstractPart(name),
+ d(new HypothesisTestPrivate(this)) {
+}
+
+HypothesisTest::~HypothesisTest() {
+ delete d;
+}
+
+void HypothesisTest::setDataSourceType(DataSourceType type) {
+ if (type != d->dataSourceType) {
+ d->dataSourceType = type;
+ }
+}
+
+HypothesisTest::DataSourceType HypothesisTest::dataSourceType() const {
+ return d->dataSourceType;
+}
+
+void HypothesisTest::setDataSourceSpreadsheet(Spreadsheet* spreadsheet) {
+ if (spreadsheet != d->dataSourceSpreadsheet)
+ d->setDataSourceSpreadsheet(spreadsheet);
+}
+
+void HypothesisTest::setColumns(const QVector& cols) {
+ d->m_columns = cols;
+}
+
+void HypothesisTest::setColumns(QStringList cols) {
+ return d->setColumns(cols);
+}
+
+QStringList HypothesisTest::allColumns() {
+ return d->all_columns;
+}
+
+void HypothesisTest::setTailType(HypothesisTest::TailType tailType) {
+ d->tail_type = tailType;
+}
+
+HypothesisTest::TailType HypothesisTest::tailType() {
+ return d->tail_type;
+}
+
+void HypothesisTest::setPopulationMean(QVariant populationMean) {
+ d->m_population_mean = populationMean.toDouble();
+}
+
+void HypothesisTest::setSignificanceLevel(QVariant alpha) {
+ d->m_significance_level = alpha.toDouble();
+}
+
+
+QString HypothesisTest::testName() {
+ return d->m_currTestName;
+}
+
+QString HypothesisTest::statsTable() {
+ return d->m_stats_table;
+}
+
+void HypothesisTest::performTwoSampleIndependentTTest(bool categorical_variable, bool equal_variance) {
+ d->m_currTestName = i18n( "Two Sample Independent T Test
");
+ d->performTwoSampleIndependentTest(HypothesisTestPrivate::TestT, categorical_variable, equal_variance);
+}
+
+void HypothesisTest::performTwoSamplePairedTTest() {
+ d->m_currTestName = i18n( "Two Sample Paried T Test
");
+ d->performTwoSamplePairedTest(HypothesisTestPrivate::TestT);
+}
+
+void HypothesisTest::performOneSampleTTest() {
+ d->m_currTestName = i18n( "One Sample T Test
");
+ d->performOneSampleTest(HypothesisTestPrivate::TestT);
+}
+
+void HypothesisTest::performTwoSampleIndependentZTest() {
+ d->m_currTestName = i18n( "Two Sample Independent Z Test
");
+ d->performTwoSampleIndependentTest(HypothesisTestPrivate::TestZ);
+}
+
+void HypothesisTest::performTwoSamplePairedZTest() {
+ d->m_currTestName = i18n( "Two Sample Paired Z Test
");
+ d->performTwoSamplePairedTest(HypothesisTestPrivate::TestZ);
+}
+
+void HypothesisTest::performOneSampleZTest() {
+ d->m_currTestName = i18n( "One Sample Z Test
");
+ d->performOneSampleTest(HypothesisTestPrivate::TestZ);
+}
+
+void HypothesisTest::performOneWayAnova() {
+ d->m_currTestName = i18n( "One Way Anova
");
+ d->performOneWayAnova();
+}
+
+void HypothesisTest::performLeveneTest(bool categorical_variable) {
+ d->m_currTestName = i18n( "Levene Test for Equality of Variance
");
+ d->performLeveneTest(categorical_variable);
+}
+
+/******************************************************************************
+ * Private Implementations
+ * ****************************************************************************/
+
+//TODO: round off numbers while printing
+//TODO: backend of z test;
+
+
+HypothesisTestPrivate::HypothesisTestPrivate(HypothesisTest* owner) : q(owner) {
+}
+
+HypothesisTestPrivate::~HypothesisTestPrivate() {
+}
+
+void HypothesisTestPrivate::setDataSourceSpreadsheet(Spreadsheet* spreadsheet) {
+ dataSourceSpreadsheet = spreadsheet;
+
+ //setting rows and columns count;
+ m_rowCount = dataSourceSpreadsheet->rowCount();
+ m_columnCount = dataSourceSpreadsheet->columnCount();
+
+ for (auto* col : dataSourceSpreadsheet->children()) {
+ all_columns << col->name();
+ }
+}
+
+
+void HypothesisTestPrivate::setColumns(QStringList cols) {
+ m_columns.clear();
+ Column* column = new Column("column");
+ for (QString col : cols) {
+ if (!cols.isEmpty()) {
+ column = dataSourceSpreadsheet->column(col);
+ m_columns.append(column);
+ }
+ }
+}
+
+
+/**************************Two Sample Independent *************************************/
+
+void HypothesisTestPrivate::performTwoSampleIndependentTest(TestType test, bool categorical_variable, bool equal_variance) {
+ QString test_name;
+
+ double value;
+ int df = 0;
+ double p_value = 0;
+ double sp = 0;
+ clearTestView();
+
+ if (m_columns.size() != 2) {
+ printError("Inappropriate number of columns selected");
+ emit q->changed();
+ return;
+ }
+
+ int n[2];
+ double sum[2], mean[2], std[2];
+
+ QString col1_name = m_columns[0]->name();
+ QString col2_name = m_columns[1]->name();
+
+ if (!categorical_variable && isNumericOrInteger(m_columns[0])) {
+ for (int i = 0; i < 2; i++) {
+ findStats(m_columns[i], n[i], sum[i], mean[i], std[i]);
+
+ if (n[i] < 1) {
+ printError("At least one of selected column is empty");
+ emit q->changed();
+ return;
+ }
+ }
+ } else {
+ QMap col_name;
+ QString base_col_name = "";
+ int np;
+ int total_rows;
+
+ countPartitions(m_columns[0], np, total_rows);
+ if (np != 2) {
+ printError( i18n("Number of Categorical Variable in Column %1 is not equal to 2", m_columns[0]->name()));
+ emit q->changed();
+ return;
+ }
+
+ if (isNumericOrInteger(m_columns[0]))
+ base_col_name = m_columns[0]->name();
+
+ ErrorType error_code = findStatsCategorical(m_columns[0], m_columns[1], n, sum, mean, std, col_name, np, total_rows);
+
+ switch (error_code) {
+ case ErrorUnqualSize: {
+ printError( i18n("Unequal size between Column %1 and Column %2", m_columns[0]->name(), m_columns[1]->name()));
+ emit q->changed();
+ return;
+ }case ErrorEmptyColumn: {
+ printError("At least one of selected column is empty");
+ emit q->changed();
+ return;
+ } case NoError:
+ break;
+ }
+
+ QMapIterator i(col_name);
+ while (i.hasNext()) {
+ i.next();
+ if (i.value() == 1)
+ col1_name = base_col_name + " " + i.key();
+ else
+ col2_name = base_col_name + " " + i.key();
+ }
+ }
+
+ QVariant row_major[] = {"", "N", "Sum", "Mean", "Std",
+ col1_name, n[0], sum[0], mean[0], std[0],
+ col2_name, n[1], sum[1], mean[1], std[1]};
+
+ m_stats_table = getHtmlTable(3, 5, row_major);
+
+ switch (test) {
+ case TestT: {
+ test_name = "T";
+
+ if (equal_variance) {
+ df = n[0] + n[1] - 2;
+ sp = qSqrt(((n[0]-1) * gsl_pow_2(std[0]) +
+ (n[1]-1) * gsl_pow_2(std[1]) ) / df );
+ value = (mean[0] - mean[1]) / (sp * qSqrt(1.0/n[0] + 1.0/n[1]));
+ printLine(9, "Assumption: Equal Variance b/w both population means");
+ } else {
+ double temp_val;
+ temp_val = gsl_pow_2( gsl_pow_2(std[0]) / n[0] + gsl_pow_2(std[1]) / n[1]);
+ temp_val = temp_val / ( (gsl_pow_2( (gsl_pow_2(std[0]) / n[0]) ) / (n[0]-1)) +
+ (gsl_pow_2( (gsl_pow_2(std[1]) / n[1]) ) / (n[1]-1)));
+ df = qRound(temp_val);
+
+ value = (mean[0] - mean[1]) / (qSqrt( (gsl_pow_2(std[0])/n[0]) +
+ (gsl_pow_2(std[1])/n[1])));
+ printLine(9, "Assumption: UnEqual Variance b/w both population means");
+ }
+ break;
+ } case TestZ: {
+ test_name = "Z";
+ sp = qSqrt( ((n[0]-1) * gsl_pow_2(std[0]) + (n[1]-1) * gsl_pow_2(std[1])) / df);
+ value = (mean[0] - mean[1]) / (sp * qSqrt( 1.0 / n[0] + 1.0 / n[1]));
+ p_value = gsl_cdf_gaussian_P(value, sp);
+ }
+ }
+
+ m_currTestName = i18n( "Two Sample Independent %1 Test for %2 vs %3
", test_name, col1_name, col2_name);
+ p_value = getPValue(test, value, col1_name, col2_name, (mean[0] - mean[1]), sp, df);
+
+ printLine(2, i18n("Significance level is %1", m_significance_level), "blue");
+ printLine(4, i18n("%1 Value is %2 ", test_name, value), "green");
+ printLine(5, i18n("P Value is %1 ", p_value), "green");
+ printLine(6, i18n("Degree of Freedom is %1", df), "green");
+
+ if (p_value <= m_significance_level)
+ q->m_view->setResultLine(5, i18n("We can safely reject Null Hypothesis for significance level %1", m_significance_level), Qt::ToolTipRole);
+ else
+ q->m_view->setResultLine(5, i18n("There is a plausibility for Null Hypothesis to be true"), Qt::ToolTipRole);
+
+ emit q->changed();
+ return;
+}
+
+/********************************Two Sample Paired ***************************************/
+
+void HypothesisTestPrivate::performTwoSamplePairedTest(TestType test) {
+ QString test_name;
+ int n;
+ double sum, mean, std;
+ double value;
+ int df = 0;
+ double p_value = 0;
+ clearTestView();
+
+ if (m_columns.size() != 2) {
+ printError("Inappropriate number of columns selected");
+ emit q->changed();
+ return;
+ }
+
+ for (int i = 0; i < 2; i++) {
+ if ( !isNumericOrInteger(m_columns[0])) {
+ printError("select only columns with numbers");
+ emit q->changed();
+ return;
+ }
+ }
+
+ ErrorType error_code = findStatsPaired(m_columns[0], m_columns[1], n, sum, mean, std);
+
+ switch (error_code) {
+ case ErrorUnqualSize: {
+ printError("both columns are having different sizes");
+ emit q->changed();
+ return;
+ }
+ case ErrorEmptyColumn: {
+ printError("columns are empty");
+ emit q->changed();
+ return;
+ }
+ case NoError:
+ break;
+ }
+
+ if (n == -1) {
+ printError("both columns are having different sizes");
+ emit q->changed();
+ return;
+ }
+
+ if (n < 1) {
+ printError("columns are empty");
+ emit q->changed();
+ return;
+ }
+
+ QVariant row_major[] = {"", "N", "Sum", "Mean", "Std",
+ "difference", n, sum, mean, std};
+
+ m_stats_table = getHtmlTable(2, 5, row_major);
+
+ switch (test) {
+ case TestT: {
+ value = mean / (std / qSqrt(n));
+ df = n - 1;
+ test_name = "T";
+ printLine(6, i18n("Degree of Freedom is %1
name(), i18n("%1", m_population_mean), mean, std, df);
+ m_currTestName = i18n( "One Sample %1 Test for %2 vs %3
", test_name, m_columns[0]->name(), m_columns[1]->name());
+
+ printLine(2, i18n("Significance level is %1 ", m_significance_level), "blue");
+ printLine(4, i18n("%1 Value is %2 ", test_name, value), "green");
+ printLine(5, i18n("P Value is %1 ", p_value), "green");
+
+ if (p_value <= m_significance_level)
+ q->m_view->setResultLine(5, i18n("We can safely reject Null Hypothesis for significance level %1", m_significance_level), Qt::ToolTipRole);
+ else
+ q->m_view->setResultLine(5, i18n("There is a plausibility for Null Hypothesis to be true"), Qt::ToolTipRole);
+
+ emit q->changed();
+ return;
+
+}
+
+/******************************** One Sample ***************************************/
+
+void HypothesisTestPrivate::performOneSampleTest(TestType test) {
+ QString test_name;
+ double value;
+ int df = 0;
+ double p_value = 0;
+ clearTestView();
+
+ if (m_columns.size() != 1) {
+ printError("Inappropriate number of columns selected");
+ emit q->changed();
+ return;
+ }
+
+ if ( !isNumericOrInteger(m_columns[0])) {
+ printError("select only columns with numbers");
+ emit q->changed();
+ return;
+ }
+
+ int n;
+ double sum, mean, std;
+ ErrorType error_code = findStats(m_columns[0], n, sum, mean, std);
+
+ switch (error_code) {
+ case ErrorUnqualSize: {
+ printError("column is empty");
+ emit q->changed();
+ return;
+ }
+ case NoError:
+ break;
+ case ErrorEmptyColumn: {
+ emit q->changed();
+ return;
+ }
+ }
+
+ QVariant row_major[] = {"", "N", "Sum", "Mean", "Std",
+ m_columns[0]->name(), n, sum, mean, std};
+
+ m_stats_table = getHtmlTable(2, 5, row_major);
+
+ switch (test) {
+ case TestT: {
+ test_name = "T";
+ value = (mean - m_population_mean) / (std / qSqrt(n));
+ df = n - 1;
+ printLine(6, i18n("Degree of Freedom is %1", df), "blue");
+ break;
+ }
+ case TestZ: {
+ test_name = "Z";
+ df = 0;
+ value = (mean - m_population_mean) / (std / qSqrt(n));
+ }
+ }
+
+ p_value = getPValue(test, value, m_columns[0]->name(), i18n("%1",m_population_mean), mean - m_population_mean, std, df);
+ m_currTestName = i18n( "One Sample %1 Test for %2
", test_name, m_columns[0]->name());
+
+ printLine(2, i18n("Significance level is %1", m_significance_level), "blue");
+ printLine(4, i18n("%1 Value is %2", test_name, value), "green");
+ printLine(5, i18n("P Value is %1", p_value), "green");
+
+ if (p_value <= m_significance_level)
+ q->m_view->setResultLine(5, i18n("We can safely reject Null Hypothesis for significance level %1", m_significance_level), Qt::ToolTipRole);
+ else
+ q->m_view->setResultLine(5, i18n("There is a plausibility for Null Hypothesis to be true"), Qt::ToolTipRole);
+
+ emit q->changed();
+ return;
+
+}
+
+/*************************************One Way Anova***************************************/
+// all standard variables and formulas are taken from this wikipedia page:
+// https://en.wikipedia.org/wiki/One-way_analysis_of_variance
+// b stands for b/w groups
+// w stands for within groups
+// np is number of partition i.e., number of classes
+void HypothesisTestPrivate::performOneWayAnova() {
+ clearTestView();
+ int np, total_rows;
+ countPartitions(m_columns[0], np, total_rows);
+
+ int* ni = new int[np];
+ double* sum = new double[np];
+ double* mean = new double[np];
+ double* std = new double[np];
+ QString* col_names = new QString[np];
+
+ QMap classname_to_index;
+ QString base_col_name = "";
+
+ if (isNumericOrInteger(m_columns[0]))
+ base_col_name = m_columns[0]->name();
+
+ findStatsCategorical(m_columns[0], m_columns[1], ni, sum, mean, std, classname_to_index, np, total_rows);
+
+ double y_bar = 0; // overall mean
+ double s_b = 0; // sum of squares of (mean - overall_mean) between the groups
+ int f_b = 0; // degree of freedom between the groups
+ double ms_b = 0; // mean sum of squares between the groups
+ double s_w = 0; // sum of squares of (value - mean of group) within the groups
+ int f_w = 0; // degree of freedom within the group
+ double ms_w = 0; // mean sum of squares within the groups
+ double f_value = 0;
+ double p_value = 0;
+
+ // now finding mean of each group;
+
+ for (int i = 0; i < np; i++)
+ y_bar += mean[i];
+ y_bar = y_bar / np;
+
+ for (int i = 0; i < np; i++) {
+ s_b += ni[i] * gsl_pow_2( ( mean[i] - y_bar));
+ if (ni[i] > 1)
+ s_w += gsl_pow_2( std[i])*(ni[i] - 1);
+ else
+ s_w += gsl_pow_2( std[i]);
+ f_w += ni[i] - 1;
+ }
+
+ f_b = np - 1;
+ ms_b = s_b / f_b;
+
+ ms_w = s_w / f_w;
+ f_value = ms_b / ms_w;
+
+
+ p_value = nsl_stats_fdist_p(f_value, static_cast(np-1), f_w);
+
+ QMapIterator i(classname_to_index);
+ while (i.hasNext()) {
+ i.next();
+ col_names[i.value()-1] = base_col_name + " " + i.key();
+ }
+
+ // now printing the statistics and result;
+ int row_count = np + 1, column_count = 5;
+ QVariant* row_major = new QVariant[row_count*column_count];
+ // header data;
+ row_major[0] = ""; row_major[1] = "Ni"; row_major[2] = "Sum"; row_major[3] = "Mean"; row_major[4] = "Std";
+
+ // table data
+ for (int row_i = 1; row_i < row_count ; row_i++) {
+ row_major[row_i*column_count] = col_names[row_i - 1];
+ row_major[row_i*column_count + 1] = ni[row_i - 1];
+ row_major[row_i*column_count + 2] = sum[row_i - 1];
+ row_major[row_i*column_count + 3] = QString::number( mean[row_i - 1], 'f', 3);
+ row_major[row_i*column_count + 4] = QString::number( std[row_i - 1], 'f', 3);
+ }
+
+ m_stats_table = i18n( "Group Summary Statistics
");
+
+ m_stats_table += getHtmlTable(row_count, column_count, row_major);
+
+ m_stats_table += getLine("");
+ m_stats_table += getLine("");
+ m_stats_table += i18n( "Grand Summary Statistics
");
+ m_stats_table += getLine("");
+ m_stats_table += getLine(i18n("Overall Mean is %1", y_bar));
+
+ row_count = 4; column_count = 3;
+ row_major->clear();
+
+ row_major[0] = ""; row_major[1] = "Between Groups"; row_major[2] = "Within Groups";
+
+ int base_index = 0;
+ base_index = 1 * column_count; row_major[base_index + 0] = "Sum of Squares"; row_major[base_index + 1] = s_b; row_major[base_index + 2] = s_w;
+ base_index = 2 * column_count; row_major[base_index + 0] = "Degree of Freedom"; row_major[base_index + 1] = f_b; row_major[base_index + 2] = f_w;
+ base_index = 3 * column_count; row_major[base_index + 0] = "Mean Square Value"; row_major[base_index + 1] = ms_b; row_major[base_index + 2] = ms_w;
+
+ m_stats_table += getHtmlTable(row_count, column_count, row_major);
+
+ printLine(1, i18n("F Value is %1", f_value), "blue");
+ printLine(2, i18n("P Value is %1 ", p_value), "green");
+
+ if (p_value <= m_significance_level)
+ q->m_view->setResultLine(2, i18n("We can safely reject Null Hypothesis for significance level %1", m_significance_level), Qt::ToolTipRole);
+ else
+ q->m_view->setResultLine(2, i18n("There is a plausibility for Null Hypothesis to be true"), Qt::ToolTipRole);
+
+ emit q->changed();
+ return;
+}
+
+/**************************************Levene Test****************************************/
+// TODO: Fix: Program crashes when n = np;
+// Some reference to local variables.
+
+// np = number of partitions
+// df = degree of fredom
+// total_rows = total number of rows in column
+
+// these variables are taken from: https://en.wikipedia.org/wiki/Levene%27s_test
+// yi_bar = mean of ith group;
+// Zij = |Yij - Yi_bar|
+// zi_bar = mean of Zij for group i
+// zi_bar_bar = mean for all zij
+// ni = number of elements in group i
+void HypothesisTestPrivate::performLeveneTest(bool categorical_variable) {
+ QString test_name;
+ double f_value;
+ int df = 0;
+ double p_value = 0;
+ int np = 0;
+ int n = 0;
+ int total_rows = 0;
+ clearTestView();
+
+ if (m_columns.size() != 2) {
+ printError("Inappropriate number of columns selected");
+ emit q->changed();
+ return;
+ }
+
+ if (!categorical_variable && isNumericOrInteger(m_columns[0]))
+ np = m_columns.size();
+ else
+ countPartitions(m_columns[0], np, n);
+
+ if (np < 2) {
+ printError("select atleast two columns / classes");
+ emit q->changed();
+ return;
+ }
+
+ double* yi_bar = new double[np];
+ double* zi_bar = new double[np];
+ double zi_bar_bar = 0;
+ double* ni = new double[np];
+
+ for (int i = 0; i < np; i++) {
+ yi_bar[i] = 0;
+ zi_bar[i] = 0;
+ ni[i] = 0;
+ }
+
+ QString* col_names = new QString[np];
+ if (!categorical_variable && isNumericOrInteger(m_columns[0])) {
+ total_rows = m_columns[0]->rowCount();
+
+ double value = 0;
+ for (int j = 0; j < total_rows; j++) {
+ int number_nan_cols = 0;
+ for (int i = 0; i < np; i++) {
+ value = m_columns[i]->valueAt(j);
+ if (std::isnan(value)) {
+ number_nan_cols++;
+ continue;
+ }
+ yi_bar[i] += value;
+ ni[i]++;
+ n++;
+ }
+ if (number_nan_cols == np) {
+ total_rows = j;
+ break;
+ }
+ }
+
+ for (int i = 0; i < np; i++) {
+ if (ni[i] > 0)
+ yi_bar[i] = yi_bar[i] / ni[i];
+ }
+
+ for (int j = 0; j < total_rows; j++) {
+ for (int i = 0; i < np; i++) {
+ value = m_columns[i]->valueAt(j);
+ if (!(std::isnan(value)))
+ zi_bar[i] += abs(value - yi_bar[i]);
+ }
+ }
+
+ for (int i = 0; i < np; i++) {
+ zi_bar_bar += zi_bar[i];
+ if (ni[i] > 0)
+ zi_bar[i] = zi_bar[i] / ni[i];
+ }
+
+ zi_bar_bar = zi_bar_bar / n;
+
+ double numerator_value = 0;
+ double denominator_value = 0;
+
+ for (int j = 0; j < total_rows; j++) {
+ for (int i = 0; i < np; i++) {
+ value = m_columns[i]->valueAt(j);
+ if (!(std::isnan(value))) {
+ double zij = abs(value - yi_bar[i]);
+ denominator_value += gsl_pow_2( (zij - zi_bar[i]));
+ }
+ }
+ }
+
+ for (int i = 0; i < np; i++) {
+ col_names[i] = m_columns[i]->name();
+ numerator_value += ni[i]*gsl_pow_2( (zi_bar[i]-zi_bar_bar));
+ }
+
+ f_value = ((n - np) / (np - 1)) * (numerator_value / denominator_value);
+
+ }
+ else {
+ QMap classname_to_index;
+
+ AbstractColumn::ColumnMode original_col_mode = m_columns[0]->columnMode();
+ m_columns[0]->setColumnMode(AbstractColumn::Text);
+
+ int partition_number = 1;
+ QString name;
+ double value;
+ int class_index;
+
+ for (int j = 0; j < n; j++) {
+ name = m_columns[0]->textAt(j);
+ value = m_columns[1]->valueAt(j);
+ if (std::isnan(value)) {
+ n = j;
+ break;
+ }
+
+ if (classname_to_index[name] == 0) {
+ classname_to_index[name] = partition_number;
+ partition_number++;
+ }
+
+ class_index = classname_to_index[name]-1;
+ ni[class_index]++;
+ yi_bar[class_index] += value;
+ }
+
+ for (int i = 0; i < np; i++) {
+ if (ni[i] > 0)
+ yi_bar[i] = yi_bar[i] / ni[i];
+ }
+
+ for (int j = 0; j < n; j++) {
+ name = m_columns[0]->textAt(j);
+ value = m_columns[1]->valueAt(j);
+ class_index = classname_to_index[name] - 1;
+ zi_bar[class_index] += abs(value - yi_bar[class_index]);
+ }
+
+ for (int i = 0; i < np; i++) {
+ zi_bar_bar += zi_bar[i];
+ zi_bar[i] = zi_bar[i] / ni[i];
+ }
+
+ zi_bar_bar = zi_bar_bar / n;
+
+ double numerator_value = 0;
+ double denominator_value = 0;
+
+ for (int j = 0; j < n; j++) {
+ name = m_columns[0]->textAt(j);
+ value = m_columns[1]->valueAt(j);
+ class_index = classname_to_index[name] - 1;
+ double zij = abs(value - yi_bar[class_index]);
+ denominator_value += gsl_pow_2( (zij - zi_bar[class_index]));
+ }
+
+ for (int i = 0; i < np; i++)
+ numerator_value += ni[i]*gsl_pow_2( (zi_bar[i]-zi_bar_bar));
+
+ f_value = ((n - np) / (np - 1)) * (numerator_value / denominator_value);
+
+ QMapIterator i(classname_to_index);
+ while (i.hasNext()) {
+ i.next();
+ col_names[i.value()-1] = m_columns[0]->name() + " " + i.key();
+ }
+
+ m_columns[0]->setColumnMode(original_col_mode);
+ }
+
+ df = n - np;
+
+ // now making the stats table.
+ int row_count = np+1;
+ int column_count = 4;
+
+ QVariant* row_major = new QVariant[row_count*column_count];
+ // header data;
+ row_major[0] = ""; row_major[1] = "Ni"; row_major[2] = "Yi_bar"; row_major[3] = "Zi_bar";
+
+ // table data
+ for (int row_i = 1; row_i < row_count; row_i++) {
+ row_major[row_i*column_count] = col_names[row_i-1];
+ row_major[row_i*column_count + 1] = ni[row_i-1];
+ row_major[row_i*column_count + 2] = yi_bar[row_i-1];
+ row_major[row_i*column_count + 3] = zi_bar[row_i-1];
+ }
+
+ m_stats_table = getHtmlTable(row_count, column_count, row_major);
+
+ p_value = nsl_stats_fdist_p(f_value, static_cast(np-1), df);
+
+ printLine(0, "Null Hypothesis: Variance is equal between all classes", "blue");
+ printLine(1, "Alternate Hypothesis: Variance is not equal in at-least one pair of classes", "blue");
+ printLine(2, i18n("Significance level is %1", m_significance_level), "blue");
+ printLine(4, i18n("F Value is %1 ", f_value), "green");
+ printLine(5, i18n("P Value is %1 ", p_value), "green");
+ printLine(6, i18n("Degree of Freedom is %1", df), "green");
+
+ if (p_value <= m_significance_level) {
+ q->m_view->setResultLine(5, i18n("We can safely reject Null Hypothesis for significance level %1", m_significance_level), Qt::ToolTipRole);
+ printLine(8, "Requirement for homogeneity is not met", "red");
+ } else {
+ q->m_view->setResultLine(5, i18n("There is a plausibility for Null Hypothesis to be true"), Qt::ToolTipRole);
+ printLine(8, "Requirement for homogeneity is met", "green");
+ }
+ emit q->changed();
+ return;
+}
+
+/***************************************Helper Functions*************************************/
+
+bool HypothesisTestPrivate::isNumericOrInteger(Column* column) {
+ return (column->columnMode() == AbstractColumn::Numeric || column->columnMode() == AbstractColumn::Integer);
+}
+
+HypothesisTestPrivate::ErrorType HypothesisTestPrivate::findStats(const Column* column, int& count, double& sum, double& mean, double& std) {
+ sum = 0;
+ mean = 0;
+ std = 0;
+
+ count = column->rowCount();
+ for (int i = 0; i < count; i++) {
+ double row = column->valueAt(i);
+ if ( std::isnan(row)) {
+ count = i;
+ break;
+ }
+ sum += row;
+ }
+
+ if (count < 1)
+ return HypothesisTestPrivate::ErrorEmptyColumn;
+
+ mean = sum / count;
+
+ for (int i = 0; i < count; i++) {
+ double row = column->valueAt(i);
+ std += gsl_pow_2( (row - mean));
+ }
+
+ if (count > 1)
+ std = std / (count-1);
+ std = qSqrt(std);
+
+ return HypothesisTestPrivate::NoError;
+}
+
+HypothesisTestPrivate::ErrorType HypothesisTestPrivate::findStatsPaired(const Column* column1, const Column* column2, int& count, double& sum, double& mean, double& std) {
+ sum = 0;
+ mean = 0;
+ std = 0;
+
+ int count1 = column1->rowCount();
+ int count2 = column2->rowCount();
+
+ count = qMin(count1, count2);
+ double cell1, cell2;
+ for (int i = 0; i < count; i++) {
+ cell1 = column1->valueAt(i);
+ cell2 = column2->valueAt(i);
+
+ if (std::isnan(cell1) || std::isnan(cell2)) {
+ if (std::isnan(cell1) && std::isnan(cell2))
+ count = i;
+ else
+ return HypothesisTestPrivate::ErrorUnqualSize;
+ break;
+ }
+
+ sum += cell1 - cell2;
+ }
+
+ if (count < 1)
+ return HypothesisTestPrivate::ErrorEmptyColumn;
+
+ mean = sum / count;
+
+ double row;
+ for (int i = 0; i < count; i++) {
+ cell1 = column1->valueAt(i);
+ cell2 = column2->valueAt(i);
+ row = cell1 - cell2;
+ std += gsl_pow_2( (row - mean));
+ }
+
+ if (count > 1)
+ std = std / (count-1);
+
+ std = qSqrt(std);
+ return HypothesisTestPrivate::NoError;
+}
+
+void HypothesisTestPrivate::countPartitions(Column* column, int& np, int& total_rows) {
+ total_rows = column->rowCount();
+ np = 0;
+ QString cell_value;
+ QMap discovered_categorical_var;
+
+ AbstractColumn::ColumnMode original_col_mode = column->columnMode();
+ column->setColumnMode(AbstractColumn::Text);
+
+ for (int i = 0; i < total_rows; i++) {
+ cell_value = column->textAt(i);
+
+ if (cell_value.isEmpty()) {
+ total_rows = i;
+ break;
+ }
+
+ if (discovered_categorical_var[cell_value])
+ continue;
+
+ discovered_categorical_var[cell_value] = true;
+ np++;
+ }
+ column->setColumnMode(original_col_mode);
+}
+
+HypothesisTestPrivate::ErrorType HypothesisTestPrivate::findStatsCategorical(Column* column1, Column* column2, int n[], double sum[], double mean[], double std[], QMap& col_name, const int& np, const int& total_rows) {
+ Column* columns[] = {column1, column2};
+
+ for (int i = 0; i < np; i++) {
+ n[i] = 0;
+ sum[i] = 0;
+ mean[i] = 0;
+ std[i] = 0;
+ }
+
+ AbstractColumn::ColumnMode original_col_mode = columns[0]->columnMode();
+ columns[0]->setColumnMode(AbstractColumn::Text);
+
+ int partition_number = 1;
+ for (int i = 0; i < total_rows; i++) {
+ QString name = columns[0]->textAt(i);
+ double value = columns[1]->valueAt(i);
+
+ if (std::isnan(value)) {
+ columns[0]->setColumnMode(original_col_mode);
+ return HypothesisTestPrivate::ErrorUnqualSize;
+ }
+
+ if (col_name[name] == 0) {
+ col_name[name] = partition_number;
+ partition_number++;
+ }
+
+ n[col_name[name]-1]++;
+ sum[col_name[name]-1] += value;
+ }
+
+ for (int i = 0; i < np; i++)
+ mean[i] = sum[i] / n[i];
+
+ for (int i = 0; i < total_rows; i++) {
+ QString name = columns[0]->textAt(i);
+ double value = columns[1]->valueAt(i);
+
+ std[col_name[name]-1] += gsl_pow_2( (value - mean[col_name[name]-1]));
+ }
+
+ for (int i = 0; i < np; i++) {
+ if (n[i] > 1)
+ std[i] = std[i] / (n[i] - 1);
+ std[i] = qSqrt(std[i]);
+ }
+
+ columns[0]->setColumnMode(original_col_mode);
+ if (isNumericOrInteger(m_columns[0])) {
+
+ }
+
+ return HypothesisTestPrivate::NoError;
+}
+
+
+//TODO change ("⋖") symbol to ("<"), currently macro UTF8_QSTRING is not working properly if used "<" symbol;
+// TODO: check for correctness between: for TestZ with TailTwo
+// p_value = 2*gsl_cdf_tdist_P(value, df) v/s
+// p_value = gsl_cdf_tdis_P(value, df) + gsl_cdf_tdis_P(-value, df);
+double HypothesisTestPrivate::getPValue(const HypothesisTestPrivate::TestType& test, double& value, const QString& col1_name, const QString& col2_name, const double mean, const double sp, const int df) {
+ double p_value = 0;
+ switch (test) {
+ case TestT: {
+ switch (tail_type) {
+ case HypothesisTest::TailNegative:
+ p_value = gsl_cdf_tdist_P(value, df);
+ printLine(0, i18n("Null Hypothesis: Population mean of %1 %2 Population mean of %3", col1_name, UTF8_QSTRING("≥"), col2_name), "blue");
+ printLine(1, i18n("Alternate Hypothesis: Population mean of %1 %2 Population mean of %3", col1_name, UTF8_QSTRING("⋖"), col2_name), "blue");
+ break;
+ case HypothesisTest::TailPositive:
+ value *= -1;
+ p_value = gsl_cdf_tdist_P(value, df);
+ printLine(0, i18n("Null Hypothesis: Population mean of %1 %2 Population mean of %3", col1_name, UTF8_QSTRING("≤"), col2_name), "blue");
+ printLine(1, i18n("Alternate Hypothesis: Population mean of %1 %2 Population mean of %3", col1_name, UTF8_QSTRING(">"), col2_name), "blue");
+ break;
+ case HypothesisTest::TailTwo:
+ p_value = 2.*gsl_cdf_tdist_P(value, df);
+
+ printLine(0, i18n("Null Hypothesis: Population mean of %1 %2 Population mean of %3", col1_name, UTF8_QSTRING("="), col2_name), "blue");
+ printLine(1, i18n("Alternate Hypothesis: Population mean of %1 %2 Population mean of %3", col1_name, UTF8_QSTRING("≠"), col2_name), "blue");
+ break;
+ }
+ break;
+ } case TestZ: {
+ switch (tail_type) {
+ case HypothesisTest::TailNegative:
+ p_value = gsl_cdf_gaussian_P(value - mean, sp);
+ printLine(0, i18n("Null Hypothesis: Population mean of %1 %2 Population mean of %3 ", col1_name, UTF8_QSTRING("≥"), col2_name), "blue");
+ printLine(1, i18n("Alternate Hypothesis: Population mean of %1 %2 Population mean of %3 ", col1_name, UTF8_QSTRING("⋖"), col2_name), "blue");
+ break;
+ case HypothesisTest::TailPositive:
+ value *= -1;
+ p_value = nsl_stats_tdist_p(value - mean, sp);
+ printLine(0, i18n("Null Hypothesis: Population mean of %1 %2 Population mean of %3 ", col1_name, UTF8_QSTRING("≤"), col2_name), "blue");
+ printLine(1, i18n("Alternate Hypothesis: Population mean of %1 %2 Population mean of %3 ", col1_name, UTF8_QSTRING(">"), col2_name), "blue");
+ break;
+ case HypothesisTest::TailTwo:
+ p_value = 2.*gsl_cdf_gaussian_P(value - mean, sp);
+ printLine(0, i18n("Null Hypothesis: Population mean of %1 %2 Population mean of %3 ", col1_name, UTF8_QSTRING("="), col2_name), "blue");
+ printLine(1, i18n("Alternate Hypothesis: Population mean of %1 %2 Population mean of %3 ", col1_name, UTF8_QSTRING("≠"), col2_name), "blue");
+ break;
+ }
+ break;
+ }
+ }
+
+ if (p_value > 1)
+ return 1;
+ return p_value;
+}
+
+QString HypothesisTestPrivate::getHtmlTable(int row, int column, QVariant* row_major) {
+ if (row < 1 || column < 1)
+ return QString();
+
+ QString table = "";
+ table = ""
+ ""
+ " ";
+
+ QString bg = "tg-0pky";
+ bool pky = true;
+
+ QString element;
+ table += "
";
+ for (int j = 0; j < column; j++) {
+ element = row_major[j].toString();
+ table += i18n(" %2 | ", bg, element);
+ }
+ table += "
";
+
+ if (pky)
+ bg = "tg-0pky";
+ else
+ bg = "tg-btxf";
+ pky = !pky;
+
+ for (int i = 1; i < row; i++) {
+ table += " ";
+
+ QString element = row_major[i*column].toString();
+ table += i18n(" %2 | ", bg, element);
+ for (int j = 1; j < column; j++) {
+ QString element = row_major[i*column+j].toString();
+ table += i18n(" %2 | ", bg, element);
+ }
+
+ table += "
";
+ if (pky)
+ bg = "tg-0pky";
+ else
+ bg = "tg-btxf";
+ pky = !pky;
+ }
+ table += "
";
+
+ return table;
+}
+
+QString HypothesisTestPrivate::getLine(const QString& msg, const QString& color) {
+ return i18n("%2
", color, msg);
+}
+
+void HypothesisTestPrivate::printLine(const int& index, const QString& msg, const QString& color) {
+ q->m_view->setResultLine(index, getLine(msg, color));
+ return;
+}
+
+void HypothesisTestPrivate::printError(const QString& error_msg) {
+ printLine(0, error_msg, "red");
+ emit q->changed();
+}
+
+
+void HypothesisTestPrivate::clearTestView() {
+ m_stats_table = "";
+ q->m_view->clearResult();
+}
+
+/**********************************************************************************
+ * virtual functions implementations
+ * ********************************************************************************/
+
+/*!
+ Saves as XML.
+ */
+void HypothesisTest::save(QXmlStreamWriter* writer) const {
+ writer->writeStartElement("hypothesisTest");
+ writeBasicAttributes(writer);
+ writeCommentElement(writer);
+ //TODO:
+
+ writer->writeEndElement();
+}
+
+/*!
+ Loads from XML.
+*/
+bool HypothesisTest::load(XmlStreamReader* reader, bool preview) {
+ Q_UNUSED(preview);
+ if (!readBasicAttributes(reader))
+ return false;
+
+ //TODO:
+
+ return !reader->hasError();
+}
+
+Spreadsheet *HypothesisTest::dataSourceSpreadsheet() const {
+ return d->dataSourceSpreadsheet;
+}
+
+
+bool HypothesisTest::exportView() const {
+ return true;
+}
+
+bool HypothesisTest::printView() {
+ return true;
+}
+
+bool HypothesisTest::printPreview() const {
+ return true;
+}
+
+/*! Constructs a primary view on me.
+ This method may be called multiple times during the life time of an Aspect, or it might not get
+ called at all. Aspects must not depend on the existence of a view for their operation.
+*/
+QWidget* HypothesisTest::view() const {
+ if (!m_partView) {
+ m_view = new HypothesisTestView(const_cast(this));
+ m_partView = m_view;
+ }
+ return m_partView;
+}
+
+/*!
+ Returns a new context menu. The caller takes ownership of the menu.
+*/
+QMenu* HypothesisTest::createContextMenu() {
+ QMenu* menu = AbstractPart::createContextMenu();
+ // Q_ASSERT(menu);
+ // emit requestProjectContextMenu(menu);
+ return menu;
+}
diff --git a/hypothesisTest/HypothesisTestPrivate.h b/hypothesisTest/HypothesisTestPrivate.h
new file mode 100644
--- /dev/null
+++ b/hypothesisTest/HypothesisTestPrivate.h
@@ -0,0 +1,87 @@
+/***************************************************************************
+ File : HypothesisTestPrivate.h
+ Project : LabPlot
+ Description : Private members of Hypothesis Test
+ --------------------------------------------------------------------
+ Copyright : (C) 2019 Devanshu Agarwal(agarwaldevanshu8@gmail.com)
+
+ ***************************************************************************/
+
+/***************************************************************************
+ * *
+ * This program is free software; you can redistribute it and/or modify *
+ * it under the terms of the GNU General Public License as published by *
+ * the Free Software Foundation; either version 2 of the License, or *
+ * (at your option) any later version. *
+ * *
+ * This program is distributed in the hope that it will be useful, *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
+ * GNU General Public License for more details. *
+ * *
+ * You should have received a copy of the GNU General Public License *
+ * along with this program; if not, write to the Free Software *
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, *
+ * Boston, MA 02110-1301 USA *
+ * *
+ ***************************************************************************/
+
+#ifndef HYPOTHESISTESTPRIVATE_H
+#define HYPOTHESISTESTPRIVATE_H
+
+#include
+
+class QStandardItemModel;
+
+class HypothesisTestPrivate {
+public:
+
+ explicit HypothesisTestPrivate(HypothesisTest*);
+ virtual ~HypothesisTestPrivate();
+
+ enum TestType {TestT, TestZ};
+ enum ErrorType {ErrorUnqualSize, ErrorEmptyColumn, NoError};
+
+ QString name() const;
+ void setDataSourceSpreadsheet(Spreadsheet* spreadsheet);
+ void setColumns(QStringList cols);
+ void performTwoSampleIndependentTest(TestType test, bool categorical_variable = false, bool equal_variance = true);
+ void performTwoSamplePairedTest(TestType test);
+ void performOneSampleTest(TestType test);
+ void performOneWayAnova();
+
+ void performLeveneTest(bool categorical_variable);
+
+ HypothesisTest* const q;
+ HypothesisTest::DataSourceType dataSourceType{HypothesisTest::DataSourceSpreadsheet};
+ Spreadsheet* dataSourceSpreadsheet{nullptr};
+ QVector m_columns;
+ QStringList all_columns;
+
+ bool m_dbCreated{false};
+ int m_rowCount{0};
+ int m_columnCount{0};
+ QString m_currTestName{"Result Table"};
+ double m_population_mean;
+ double m_significance_level;
+ QString m_stats_table;
+ HypothesisTest::TailType tail_type;
+
+private:
+ bool isNumericOrInteger(Column* column);
+
+ void countPartitions(Column* column, int& np, int& total_rows);
+ ErrorType findStats(const Column* column,int& count, double& sum, double& mean, double& std);
+ ErrorType findStatsPaired(const Column* column1, const Column* column2, int& count, double& sum, double& mean, double& std);
+ ErrorType findStatsCategorical(Column* column1, Column* column2, int n[], double sum[], double mean[], double std[], QMap& col_name, const int& np, const int& total_rows);
+
+ double getPValue(const TestType& test, double& value, const QString& col1_name, const QString& col2_name, const double mean, const double sp, const int df);
+ QString getHtmlTable(int row, int column, QVariant* row_major);
+
+ QString getLine(const QString& msg, const QString& color = "black");
+ void printLine(const int& index, const QString& msg, const QString& color = "black");
+ void printError(const QString& error_msg);
+ void clearTestView();
+};
+
+#endif