Differential D22496 Diff 61870 src/backend/hypothesisTest/HypothesisTest.cpp

Changeset View

Standalone View

src/backend/hypothesisTest/HypothesisTest.cpp

Context not available.
47	#include <gsl/gsl_cdf.h>	47		#include <gsl/gsl_cdf.h>
48	#include <gsl/gsl_math.h>	48		#include <gsl/gsl_math.h>
49		49
50	#include <math.h>
51
52	extern "C" {	50		extern "C" {
53	#include "backend/nsl/nsl_stats.h"	51		#include "backend/nsl/nsl_stats.h"
54	}	52		}
Context not available.
106		104
107	void HypothesisTest::performTest(Test test, bool categoricalVariable, bool equalVariance) {	105		void HypothesisTest::performTest(Test test, bool categoricalVariable, bool equalVariance) {
108	d->tailType = test.tail;	106		d->tailType = test.tail;
		107		d->pValue.clear();
		108		d->statisticValue.clear();
		109		d->statsTable = "";
		110		for (int i = 0; i < 10; i++)
		111		d->resultLine[i]->clear();
		112
109	switch (test.subtype) {	113		switch (test.subtype) {
110	case HypothesisTest::Test::SubType::TwoSampleIndependent: {	114		case HypothesisTest::Test::SubType::TwoSampleIndependent: {
111	d->currTestName = "<h2>" + i18n("Two Sample Independent Test") + "</h2>";	115		d->currTestName = "<h2>" + i18n("Two Sample Independent Test") + "</h2>";
Context not available.
141	void HypothesisTest::performLeveneTest(bool categoricalVariable) {	145		void HypothesisTest::performLeveneTest(bool categoricalVariable) {
142	d->currTestName = "<h2>" + i18n("Levene Test for Equality of Variance") + "</h2>";	146		d->currTestName = "<h2>" + i18n("Levene Test for Equality of Variance") + "</h2>";
143	d->performLeveneTest(categoricalVariable);	147		d->performLeveneTest(categoricalVariable);
144
145	emit changed();	148		emit changed();
146	}	149		}
147		150
148	double HypothesisTest::statisticValue() {	151		QList<double> HypothesisTest::statisticValue() {
149	return d->statisticValue;	152		return d->statisticValue;
150	}	153		}
151		154
152	double HypothesisTest::pValue() {	155		QList<double> HypothesisTest::pValue() {
153	return d->pValue;	156		return d->pValue;
154	}	157		}
155		158
Context not available.
203	/************************Two Sample Independent ***********************************/	206		/************************Two Sample Independent ***********************************/
204		207
205	void HypothesisTestPrivate::performTwoSampleIndependentTest(HypothesisTest::Test::Type test, bool categoricalVariable, bool equalVariance) {	208		void HypothesisTestPrivate::performTwoSampleIndependentTest(HypothesisTest::Test::Type test, bool categoricalVariable, bool equalVariance) {
206	clearTestView();
207
208	if (columns.size() != 2) {	209		if (columns.size() != 2) {
209	printError("Inappropriate number of columns selected");	210		printError("Inappropriate number of columns selected");
210	return;	211		return;
Context not available.
300		301
301	sp = qSqrt(((n[0]-1) * gsl_pow_2(std[0]) +	302		sp = qSqrt(((n[0]-1) * gsl_pow_2(std[0]) +
302	(n[1]-1) * gsl_pow_2(std[1]) ) / df );	303		(n[1]-1) * gsl_pow_2(std[1]) ) / df );
303	statisticValue = (mean[0] - mean[1]) / (sp * qSqrt(1.0/n[0] + 1.0/n[1]));	304		statisticValue.append((mean[0] - mean[1]) / (sp * qSqrt(1.0/n[0] + 1.0/n[1])));
			sgerlachUnsubmitted Done I would suggest to use sqrt() from cmath instead of qSqrt(). What do you think? sgerlach: I would suggest to use sqrt() from cmath instead of qSqrt(). What do you think?
			devanshuagarwalAuthorUnsubmitted Done Yes you are right. devanshuagarwal: Yes you are right.
304	printLine(9, "<b>Assumption:</b> Equal Variance b/w both population means");	305		printLine(9, "<b>Assumption:</b> Equal Variance b/w both population means");
305	} else {	306		} else {
306	double temp_val;	307		double temp_val;
Context not available.
309	(gsl_pow_2( (gsl_pow_2(std[1]) / n[1]) ) / (n[1]-1)));	310		(gsl_pow_2( (gsl_pow_2(std[1]) / n[1]) ) / (n[1]-1)));
310	df = qRound(temp_val);	311		df = qRound(temp_val);
311		312
312	statisticValue = (mean[0] - mean[1]) / (qSqrt( (gsl_pow_2(std[0])/n[0]) +	313		statisticValue.append((mean[0] - mean[1]) / (qSqrt( (gsl_pow_2(std[0])/n[0]) +
313	(gsl_pow_2(std[1])/n[1])));	314		(gsl_pow_2(std[1])/n[1]))));
314	printLine(9, "<b>Assumption:</b> UnEqual Variance b/w both population means");	315		printLine(9, "<b>Assumption:</b> UnEqual Variance b/w both population means");
315	}	316		}
316		317
Context not available.
320	case HypothesisTest::Test::Type::ZTest: {	321		case HypothesisTest::Test::Type::ZTest: {
321	testName = "Z";	322		testName = "Z";
322	sp = qSqrt( ((n[0]-1) * gsl_pow_2(std[0]) + (n[1]-1) * gsl_pow_2(std[1])) / df);	323		sp = qSqrt( ((n[0]-1) * gsl_pow_2(std[0]) + (n[1]-1) * gsl_pow_2(std[1])) / df);
323	statisticValue = (mean[0] - mean[1]) / (sp * qSqrt( 1.0 / n[0] + 1.0 / n[1]));	324		statisticValue.append((mean[0] - mean[1]) / (sp * qSqrt( 1.0 / n[0] + 1.0 / n[1])));
324	pValue = gsl_cdf_gaussian_P(statisticValue, sp);	325		// pValue.append(gsl_cdf_gaussian_P(statisticValue, sp));
325	break;	326		break;
326	}	327		}
327	case HypothesisTest::Test::Type::Anova:	328		case HypothesisTest::Test::Type::Anova:
Context not available.
330	}	331		}
331		332
332	currTestName = "<h2>" + i18n("Two Sample Independent %1 Test for %2 vs %3", testName, col1Name, col2Name) + "</h2>";	333		currTestName = "<h2>" + i18n("Two Sample Independent %1 Test for %2 vs %3", testName, col1Name, col2Name) + "</h2>";
333	pValue = getPValue(test, statisticValue, col1Name, col2Name, (mean[0] - mean[1]), sp, df);	334		pValue.append(getPValue(test, statisticValue[0], col1Name, col2Name, (mean[0] - mean[1]), sp, df));
334		335
335	printLine(2, i18n("Significance level is %1", round(significanceLevel)), "blue");	336		printLine(2, i18n("Significance level is %1", round(significanceLevel)), "blue");
336		337
337	printLine(4, i18n("%1 Value is %2 ", testName, round(statisticValue)), "green");	338		printLine(4, i18n("%1 Value is %2 ", testName, round(statisticValue[0])), "green");
338	printTooltip(4, i18n("More is the \|%1-value\|, more safely we can reject the null hypothesis", testName));	339		printTooltip(4, i18n("More is the \|%1-value\|, more safely we can reject the null hypothesis", testName));
339		340
340	printLine(5, i18n("P Value is %1 ", pValue), "green");	341		printLine(5, i18n("P Value is %1 ", pValue[0]), "green");
341		342
342	printLine(6, i18n("Degree of Freedom is %1", df), "green");	343		printLine(6, i18n("Degree of Freedom is %1", df), "green");
343	printTooltip(6, i18n("Number of independent Pieces of information that went into calculating the estimate"));	344		printTooltip(6, i18n("Number of independent Pieces of information that went into calculating the estimate"));
344		345
345	if (pValue <= significanceLevel)	346		if (pValue[0] <= significanceLevel)
346	printTooltip(5, i18n("We can safely reject Null Hypothesis for significance level %1", round(significanceLevel)));	347		printTooltip(5, i18n("We can safely reject Null Hypothesis for significance level %1", round(significanceLevel)));
347	else	348		else
348	printTooltip(5, i18n("There is a plausibility for Null Hypothesis to be true"));	349		printTooltip(5, i18n("There is a plausibility for Null Hypothesis to be true"));
Context not available.
352	/******************************Two Sample Paired *************************************/	353		/******************************Two Sample Paired *************************************/
353		354
354	void HypothesisTestPrivate::performTwoSamplePairedTest(HypothesisTest::Test::Type test) {	355		void HypothesisTestPrivate::performTwoSamplePairedTest(HypothesisTest::Test::Type test) {
355	clearTestView();
356
357	if (columns.size() != 2) {	356		if (columns.size() != 2) {
358	printError("Inappropriate number of columns selected");	357		printError("Inappropriate number of columns selected");
359		358
Context not available.
403		402
404	switch (test) {	403		switch (test) {
405	case HypothesisTest::Test::Type::TTest: {	404		case HypothesisTest::Test::Type::TTest: {
406	statisticValue = mean / (std / qSqrt(n));	405		statisticValue[0] = mean / (std / qSqrt(n));
407	df = n - 1;	406		df = n - 1;
408	testName = "T";	407		testName = "T";
409	printLine(6, i18n("Degree of Freedom is %1</p", df), "green");	408		printLine(6, i18n("Degree of Freedom is %1</p", df), "green");
Context not available.
411	}	410		}
412	case HypothesisTest::Test::Type::ZTest: {	411		case HypothesisTest::Test::Type::ZTest: {
413	testName = "Z";	412		testName = "Z";
414	statisticValue = mean / (std / qSqrt(n));	413		statisticValue[0] = mean / (std / qSqrt(n));
415	df = n - 1;	414		df = n - 1;
416	break;	415		break;
417	}	416		}
Context not available.
422		421
423	}	422		}
424		423
425	pValue = getPValue(test, statisticValue, columns[0]->name(), i18n("%1", populationMean), mean, std, df);	424		pValue.append(getPValue(test, statisticValue[0], columns[0]->name(), i18n("%1", populationMean), mean, std, df));
426	currTestName = "<h2>" + i18n("One Sample %1 Test for %2 vs %3", testName, columns[0]->name(), columns[1]->name()) + "</h2>";	425		currTestName = "<h2>" + i18n("One Sample %1 Test for %2 vs %3", testName, columns[0]->name(), columns[1]->name()) + "</h2>";
427		426
428	printLine(2, i18n("Significance level is %1 ", round(significanceLevel)), "blue");	427		printLine(2, i18n("Significance level is %1 ", round(significanceLevel)), "blue");
429	printLine(4, i18n("%1 Value is %2 ", testName, round(statisticValue)), "green");	428		printLine(4, i18n("%1 Value is %2 ", testName, round(statisticValue[0])), "green");
430	printLine(5, i18n("P Value is %1 ", pValue), "green");	429		printLine(5, i18n("P Value is %1 ", pValue[0]), "green");
431		430
432	if (pValue <= significanceLevel)	431		if (pValue[0] <= significanceLevel)
433	printTooltip(5, i18n("We can safely reject Null Hypothesis for significance level %1", significanceLevel));	432		printTooltip(5, i18n("We can safely reject Null Hypothesis for significance level %1", significanceLevel));
434	else	433		else
435	printTooltip(5, i18n("There is a plausibility for Null Hypothesis to be true"));	434		printTooltip(5, i18n("There is a plausibility for Null Hypothesis to be true"));
Context not available.
440	/****************************** One Sample *************************************/	439		/****************************** One Sample *************************************/
441		440
442	void HypothesisTestPrivate::performOneSampleTest(HypothesisTest::Test::Type test) {	441		void HypothesisTestPrivate::performOneSampleTest(HypothesisTest::Test::Type test) {
443	clearTestView();
444
445	if (columns.size() != 1) {	442		if (columns.size() != 1) {
446	printError("Inappropriate number of columns selected");	443		printError("Inappropriate number of columns selected");
447		444
Context not available.
488	switch (test) {	485		switch (test) {
489	case HypothesisTest::Test::Type::TTest: {	486		case HypothesisTest::Test::Type::TTest: {
490	testName = "T";	487		testName = "T";
491	statisticValue = (mean - populationMean) / (std / qSqrt(n));	488		statisticValue.append((mean - populationMean) / (std / qSqrt(n)));
492	df = n - 1;	489		df = n - 1;
493	printLine(6, i18n("Degree of Freedom is %1", df), "blue");	490		printLine(6, i18n("Degree of Freedom is %1", df), "blue");
494	break;	491		break;
Context not available.
496	case HypothesisTest::Test::Type::ZTest: {	493		case HypothesisTest::Test::Type::ZTest: {
497	testName = "Z";	494		testName = "Z";
498	df = 0;	495		df = 0;
499	statisticValue = (mean - populationMean) / (std / qSqrt(n));	496		statisticValue.append((mean - populationMean) / (std / qSqrt(n)));
500	break;	497		break;
501	}	498		}
502	case HypothesisTest::Test::Type::Anova:	499		case HypothesisTest::Test::Type::Anova:
Context not available.
504	break;	501		break;
505	}	502		}
506		503
507	pValue = getPValue(test, statisticValue, columns[0]->name(), i18n("%1",populationMean), mean - populationMean, std, df);	504		pValue.append(getPValue(test, statisticValue[0], columns[0]->name(), i18n("%1",populationMean), mean - populationMean, std, df));
508	currTestName = "<h2>" + i18n("One Sample %1 Test for %2", testName, columns[0]->name()) + "</h2>";	505		currTestName = "<h2>" + i18n("One Sample %1 Test for %2", testName, columns[0]->name()) + "</h2>";
509		506
510	printLine(2, i18n("Significance level is %1", round(significanceLevel)), "blue");	507		printLine(2, i18n("Significance level is %1", round(significanceLevel)), "blue");
511	printLine(4, i18n("%1 Value is %2", testName, round(statisticValue)), "green");	508		printLine(4, i18n("%1 Value is %2", testName, round(statisticValue[0])), "green");
512	printLine(5, i18n("P Value is %1", pValue), "green");	509		printLine(5, i18n("P Value is %1", pValue[0]), "green");
513		510
514	if (pValue <= significanceLevel)	511		if (pValue[0] <= significanceLevel)
515	printTooltip(5, i18n("We can safely reject Null Hypothesis for significance level %1", significanceLevel));	512		printTooltip(5, i18n("We can safely reject Null Hypothesis for significance level %1", significanceLevel));
516	else	513		else
517	printTooltip(5, i18n("There is a plausibility for Null Hypothesis to be true"));	514		printTooltip(5, i18n("There is a plausibility for Null Hypothesis to be true"));
Context not available.
526	// w stands for within groups	523		// w stands for within groups
527	// np is number of partition i.e., number of classes	524		// np is number of partition i.e., number of classes
528	void HypothesisTestPrivate::performOneWayAnova() {	525		void HypothesisTestPrivate::performOneWayAnova() {
529	clearTestView();	526		int np, totalRows;
530	int np, totalRows;
531	countPartitions(columns[0], np, totalRows);	527		countPartitions(columns[0], np, totalRows);
532		528
533	int* ni = new int[np];	529		int* ni = new int[np];
Context not available.
551	double sW = 0; // sum of squares of (value - mean of group) within the groups	547		double sW = 0; // sum of squares of (value - mean of group) within the groups
552	int fW = 0; // degree of freedom within the group	548		int fW = 0; // degree of freedom within the group
553	double msW = 0; // mean sum of squares within the groups	549		double msW = 0; // mean sum of squares within the groups
554	double fValue = 0;
555
556		550
557	// now finding mean of each group;	551		// now finding mean of each group;
558		552
Context not available.
573	msB = sB / fB;	567		msB = sB / fB;
574		568
575	msW = sW / fW;	569		msW = sW / fW;
576	fValue = msB / msW;	570		statisticValue.append(msB / msW);
577		571
578		572
579	pValue = nsl_stats_fdist_p(fValue, static_cast<size_t>(np-1), fW);	573		pValue.append(nsl_stats_fdist_p(statisticValue[0], static_cast<size_t>(np-1), fW));
580		574
581	QMapIterator<QString, int> i(classnameToIndex);	575		QMapIterator<QString, int> i(classnameToIndex);
582	while (i.hasNext()) {	576		while (i.hasNext()) {
Context not available.
643	delete[] std;	637		delete[] std;
644	delete[] colNames;	638		delete[] colNames;
645		639
646	printLine(1, i18n("F Value is %1", round(fValue)), "green");	640		printLine(1, i18n("F Value is %1", round(statisticValue[0])), "green");
647	printLine(2, i18n("P Value is %1 ", pValue), "green");	641		printLine(2, i18n("P Value is %1 ", pValue[0]), "green");
648		642
649	if (pValue <= significanceLevel)	643		if (pValue[0] <= significanceLevel)
650	printTooltip(2, i18n("We can safely reject Null Hypothesis for significance level %1", significanceLevel));	644		printTooltip(2, i18n("We can safely reject Null Hypothesis for significance level %1", significanceLevel));
651	else	645		else
652	printTooltip(2, i18n("There is a plausibility for Null Hypothesis to be true"));	646		printTooltip(2, i18n("There is a plausibility for Null Hypothesis to be true"));
Context not available.
659	// all formulas and symbols are taken from: http://statweb.stanford.edu/~susan/courses/s141/exanova.pdf	653		// all formulas and symbols are taken from: http://statweb.stanford.edu/~susan/courses/s141/exanova.pdf
660		654
661	//TODO: suppress warning of variable length array are a C99 feature.	655		//TODO: suppress warning of variable length array are a C99 feature.
662	//TODO: changed int mean to double mean;	656		//TODO: add assumptions verification option
		657		//TODO: add tail option (if needed)
663	void HypothesisTestPrivate::performTwoWayAnova() {	658		void HypothesisTestPrivate::performTwoWayAnova() {
664	clearTestView();
665	int np_a, totalRows_a;	659		int np_a, totalRows_a;
666	int np_b, totalRows_b;	660		int np_b, totalRows_b;
667	countPartitions(columns[0], np_a, totalRows_a);	661		countPartitions(columns[0], np_a, totalRows_a);
Context not available.
720	groupMean[i][j] /= replicates[i][j];	714		groupMean[i][j] /= replicates[i][j];
721	}	715		}
722		716
723	for (int i = 0; i < np_a; i++)	717		// for (int i = 0; i < np_a; i++)
724	for (int j = 0; j < np_b; j++)	718		// for (int j = 0; j < np_b; j++)
725	groupMean[i][j] = int(groupMean[i][j]);	719		// groupMean[i][j] = int(groupMean[i][j]);
726		720
727	double ss_within = 0;	721		double ss_within = 0;
728	for (int i = 0; i < totalRows_a; i++) {	722		for (int i = 0; i < totalRows_a; i++) {
Context not available.
749	for (int i = 0; i < np_a; i++)	743		for (int i = 0; i < np_a; i++)
750	mean += mean_a[i] / np_a;	744		mean += mean_a[i] / np_a;
751		745
752	QDEBUG("ss_within is " << ss_within);
753	QDEBUG("df_within is " << df_within);
754	QDEBUG("ms_within is " << ms_within);
755		746
		747		double ss_a = 0;
			sgerlachUnsubmitted Done you are doing a lot of basic statistical calculations. Can you check if any GSL function can be used (see https://www.gnu.org/software/gsl/doc/html/statistics.html)? sgerlach: you are doing a lot of basic statistical calculations. Can you check if any GSL function can be…
			devanshuagarwalAuthorUnsubmitted Done The reason that I am doing so many basic calculations is to increase performance. If I will use functions then I have to call separate functions for mean, std deviation and other statistical calculations. Though asymptotically it will also end up in O(n) but for practical purposes, it will turn out to be slower as here I am transversing through the whole data only twice and there it will be f times (where f >= number of statistical calculations) devanshuagarwal: The reason that I am doing so many basic calculations is to increase performance. If I will use…
			sgerlachUnsubmitted Done I'm not sure if i understand your argument. You don't have to calculate it f times when you save the values. Also the performance should not be a problem. I would really recommend to use GSL function to improve readability and maintainability. Of course only if it fits to the workflow. Just think about it. sgerlach: I'm not sure if i understand your argument. You don't have to calculate it f times when you…
			devanshuagarwalAuthorUnsubmitted Done Yes, you are right. it will improve readability and maintainability. Will use gsl functions. devanshuagarwal: Yes, you are right. it will improve readability and maintainability. Will use gsl functions.
756	for (int i = 0; i < np_a; i++)	748		for (int i = 0; i < np_a; i++)
757	QDEBUG("mean_a is " << mean_a[i]);	749		ss_a += gsl_pow_2(mean_a[i] - mean);
		750		ss_a = replicate np_b;
		751
		752		int df_a = np_a - 1;
		753		double ms_a = ss_a / df_a;
		754
		755		double ss_b = 0;
758	for (int i = 0; i < np_b; i++)	756		for (int i = 0; i < np_b; i++)
759	QDEBUG("mean_b is " << mean_b[i]);	757		ss_b += gsl_pow_2(mean_b[i] - mean);
		758		ss_b = replicate np_a;
		759
		760		int df_b = np_b - 1;
		761		double ms_b = ss_b / df_b;
		762
		763		double ss_interaction = 0;
		764
		765		for (int i = 0; i < np_a; i++)
		766		for (int j = 0; j < np_b; j++)
		767		ss_interaction += gsl_pow_2(groupMean[i][j] - mean_a[i] - mean_b[j] + mean);
760		768
		769		ss_interaction *= replicate;
		770		int df_interaction = (np_a - 1) * (np_b - 1);
		771		double ms_interaction = ss_interaction / df_interaction;
761		772
762	QString partitionNames_a[np_a];	773		QString partitionNames_a[np_a];
763	QString partitionNames_b[np_b];	774		QString partitionNames_b[np_b];
Context not available.
791	for (int i = 0; i < np_a; i++) {	802		for (int i = 0; i < np_a; i++) {
792	rowMajor.append(new Cell(partitionNames_a[i], level, true));	803		rowMajor.append(new Cell(partitionNames_a[i], level, true));
793	for (int j = 0; j < np_b; j++) {	804		for (int j = 0; j < np_b; j++) {
794	rowMajor.append(new Cell(groupMean[i][j], level));	805		rowMajor.append(new Cell(round(groupMean[i][j]), level));
795	rowMajor.append(new Cell(replicates[i][j], level));	806		rowMajor.append(new Cell(replicates[i][j], level));
796	}	807		}
797	rowMajor.append(new Cell(mean_a[i], level));	808		rowMajor.append(new Cell(round(mean_a[i]), level));
798	level++;	809		level++;
799	}	810		}
800		811
801	rowMajor.append(new Cell("Mean", level, true));	812		rowMajor.append(new Cell("Mean", level, true));
802	for (int i = 0; i < np_b; i++)	813		for (int i = 0; i < np_b; i++)
803	rowMajor.append(new Cell(mean_b[i], level, false, 1, 2));	814		rowMajor.append(new Cell(round(mean_b[i]), level, false, 1, 2));
804	rowMajor.append(new Cell(mean, level));	815		rowMajor.append(new Cell(round(mean), level));
805		816
806	statsTable = "<h3>" + i18n("Contingency Table") + "</h3>";	817		statsTable = "<h3>" + i18n("Contingency Table") + "</h3>";
807	statsTable += getHtmlTable3(rowMajor);	818		statsTable += getHtmlTable3(rowMajor);
808		819
809	// QDEBUG("");	820		statsTable += "</br>";
810	// QDEBUG("");	821		statsTable += "<h3>" + i18n("results table") + "</h3>";
811	// QDEBUG(statsTable);	822
		823		rowMajor.clear();
		824		level = 0;
		825		rowMajor.append(new Cell("", level, true));
		826		rowMajor.append(new Cell("SS", level, true));
		827		rowMajor.append(new Cell("DF", level, true));
		828		rowMajor.append(new Cell("MS", level, true));
		829
		830		level++;
		831		rowMajor.append(new Cell(columns[0]->name(), level, true));
		832		rowMajor.append(new Cell(round(ss_a), level));
		833		rowMajor.append(new Cell(df_a, level));
		834		rowMajor.append(new Cell(round(ms_a), level));
		835
		836		level++;
		837		rowMajor.append(new Cell(columns[1]->name(), level, true));
		838		rowMajor.append(new Cell(round(ss_b), level));
		839		rowMajor.append(new Cell(df_b, level));
		840		rowMajor.append(new Cell(round(ms_b), level));
		841
		842		level++;
		843		rowMajor.append(new Cell("Interaction", level, true));
			sgerlachUnsubmitted Done translation? sgerlach: translation?
			devanshuagarwalAuthorUnsubmitted Done the translation is automatic in getHtmlTable3 function devanshuagarwal: the translation is automatic in getHtmlTable3 function
			sgerlachUnsubmitted Done ok sgerlach: ok
		844		rowMajor.append(new Cell(round(ss_interaction), level));
		845		rowMajor.append(new Cell(df_interaction, level));
		846		rowMajor.append(new Cell(round(ms_interaction), level));
		847
		848		level++;
		849		rowMajor.append(new Cell("Within", level, true));
			sgerlachUnsubmitted Done translation? sgerlach: translation?
			devanshuagarwalAuthorUnsubmitted Done It is there in getHtmlTable3 method. devanshuagarwal: It is there in getHtmlTable3 method.
			sgerlachUnsubmitted Done ok sgerlach: ok
		850		rowMajor.append(new Cell(round(ss_within), level));
		851		rowMajor.append(new Cell(df_within, level));
		852		rowMajor.append(new Cell(round(ms_within), level));
		853
		854		statsTable += getHtmlTable3(rowMajor);
		855
		856		double fValue_a = ms_a / ms_within;
		857		double fValue_b = ms_b / ms_within;
		858		double fValue_interaction = ms_interaction / ms_within;
		859
		860		double pValue_a = nsl_stats_fdist_p(fValue_a, static_cast<size_t>(np_a - 1), df_a);
		861		double pValue_b = nsl_stats_fdist_p(fValue_b, static_cast<size_t>(np_b - 1), df_b);
		862
		863		printLine(0, "F(df<sub>" + columns[0]->name() + "</sub>, df<sub>within</sub>) is " + round(fValue_a), "blue");
		864		printLine(1, "F(df<sub>" + columns[1]->name() + "</sub>, df<sub>within</sub>) is " + round(fValue_b), "blue");
		865		printLine(2, "F(df<sub>interaction</sub>, df<sub>within</sub>) is " + round(fValue_interaction), "blue");
		866
		867		printLine(4, "P(df<sub>" + columns[0]->name() + "</sub>, df<sub>within</sub>) is " + round(pValue_a), "blue");
		868		printLine(5, "P(df<sub>" + columns[1]->name() + "</sub>, df<sub>within</sub>) is " + round(pValue_b), "blue");
		869		// printLine(2, "P(df<sub>interaction</sub>, df<sub>within</sub>) is " + round(fValue_interaction), "blue");
		870
		871		statisticValue.append(fValue_a);
		872		statisticValue.append(fValue_b);
		873		statisticValue.append(fValue_interaction);
		874
		875		pValue.append(pValue_a);
		876		pValue.append(pValue_b);
		877
812	return;	878		return;
813	}	879		}
814		880
Context not available.
825	// ziBarBar = mean for all zij	891		// ziBarBar = mean for all zij
826	// ni = number of elements in group i	892		// ni = number of elements in group i
827	void HypothesisTestPrivate::performLeveneTest(bool categoricalVariable) {	893		void HypothesisTestPrivate::performLeveneTest(bool categoricalVariable) {
828	clearTestView();
829
830	if (columns.size() != 2) {	894		if (columns.size() != 2) {
831	printError("Inappropriate number of columns selected");	895		printError("Inappropriate number of columns selected");
832	return;	896		return;
Context not available.
1045	delete[] ziBar;	1109		delete[] ziBar;
1046	delete[] ni;	1110		delete[] ni;
1047		1111
1048	pValue = nsl_stats_fdist_p(fValue, static_cast<size_t>(np-1), df);	1112		pValue.append(nsl_stats_fdist_p(fValue, static_cast<size_t>(np-1), df));
1049		1113
1050	printLine(0, "Null Hypothesis: Variance is equal between all classes", "blue");	1114		printLine(0, "Null Hypothesis: Variance is equal between all classes", "blue");
1051	printLine(1, "Alternate Hypothesis: Variance is not equal in at-least one pair of classes", "blue");	1115		printLine(1, "Alternate Hypothesis: Variance is not equal in at-least one pair of classes", "blue");
1052	printLine(2, i18n("Significance level is %1", round(significanceLevel)), "blue");	1116		printLine(2, i18n("Significance level is %1", round(significanceLevel)), "blue");
1053	printLine(4, i18n("F Value is %1 ", round(fValue)), "green");	1117		printLine(4, i18n("F Value is %1 ", round(fValue)), "green");
1054	printLine(5, i18n("P Value is %1 ", pValue), "green");	1118		printLine(5, i18n("P Value is %1 ", pValue[0]), "green");
1055	printLine(6, i18n("Degree of Freedom is %1", df), "green");	1119		printLine(6, i18n("Degree of Freedom is %1", df), "green");
1056		1120
1057	if (pValue <= significanceLevel) {	1121		if (pValue[0] <= significanceLevel) {
1058	printTooltip(5, i18n("We can safely reject Null Hypothesis for significance level %1", significanceLevel));	1122		printTooltip(5, i18n("We can safely reject Null Hypothesis for significance level %1", significanceLevel));
1059	printLine(8, "Requirement for homogeneity is not met", "red");	1123		printLine(8, "Requirement for homogeneity is not met", "red");
1060	} else {	1124		} else {
Context not available.
1062	printLine(8, "Requirement for homogeneity is met", "green");	1126		printLine(8, "Requirement for homogeneity is met", "green");
1063	}	1127		}
1064		1128
		1129		statisticValue.append(fValue);
1065	return;	1130		return;
1066	}	1131		}
1067		1132
Context not available.
1247		1312
1248	//TODO change ("⋖") symbol to ("<"), currently macro UTF8_QSTRING is not working properly if used "<" symbol;	1313		//TODO change ("⋖") symbol to ("<"), currently macro UTF8_QSTRING is not working properly if used "<" symbol;
1249	// TODO: check for correctness between: for TestZ with TailTwo	1314		// TODO: check for correctness between: for TestZ with TailTwo
1250	// pValue = 2*gsl_cdf_tdist_P(value, df) v/s	1315		// pValue.append(2*gsl_cdf_tdist_P(value, df) v/s
1251	// pValue = gsl_cdf_tdis_P(value, df) + gsl_cdf_tdis_P(-value, df);	1316		// pValue.append(gsl_cdf_tdis_P(value, df) + gsl_cdf_tdis_P(-value, df);
1252	double HypothesisTestPrivate::getPValue(const HypothesisTest::Test::Type& test, double& value, const QString& col1Name, const QString& col2Name, const double mean, const double sp, const int df) {	1317		double HypothesisTestPrivate::getPValue(const HypothesisTest::Test::Type& test, double& value, const QString& col1Name, const QString& col2Name, const double mean, const double sp, const int df) {
1253		1318
1254	switch (test) {	1319		switch (test) {
1255	case HypothesisTest::Test::Type::TTest: {	1320		case HypothesisTest::Test::Type::TTest: {
1256	switch (tailType) {	1321		switch (tailType) {
1257	case HypothesisTest::Test::Tail::Negative: {	1322		case HypothesisTest::Test::Tail::Negative: {
1258	pValue = gsl_cdf_tdist_P(value, df);	1323		pValue.append(gsl_cdf_tdist_P(value, df));
1259	printLine(0, i18n("Null Hypothesis: Population mean of %1 %2 Population mean of %3", col1Name, UTF8_QSTRING("≥"), col2Name), "blue");	1324		printLine(0, i18n("Null Hypothesis: Population mean of %1 %2 Population mean of %3", col1Name, UTF8_QSTRING("≥"), col2Name), "blue");
1260	printLine(1, i18n("Alternate Hypothesis: Population mean of %1 %2 Population mean of %3", col1Name, UTF8_QSTRING("⋖"), col2Name), "blue");	1325		printLine(1, i18n("Alternate Hypothesis: Population mean of %1 %2 Population mean of %3", col1Name, UTF8_QSTRING("⋖"), col2Name), "blue");
1261	break;	1326		break;
1262	}	1327		}
1263	case HypothesisTest::Test::Tail::Positive: {	1328		case HypothesisTest::Test::Tail::Positive: {
1264	value *= -1;	1329		value *= -1;
1265	pValue = gsl_cdf_tdist_P(value, df);	1330		pValue.append(gsl_cdf_tdist_P(value, df));
1266	printLine(0, i18n("Null Hypothesis: Population mean of %1 %2 Population mean of %3", col1Name, UTF8_QSTRING("≤"), col2Name), "blue");	1331		printLine(0, i18n("Null Hypothesis: Population mean of %1 %2 Population mean of %3", col1Name, UTF8_QSTRING("≤"), col2Name), "blue");
1267	printLine(1, i18n("Alternate Hypothesis: Population mean of %1 %2 Population mean of %3", col1Name, UTF8_QSTRING(">"), col2Name), "blue");	1332		printLine(1, i18n("Alternate Hypothesis: Population mean of %1 %2 Population mean of %3", col1Name, UTF8_QSTRING(">"), col2Name), "blue");
1268	break;	1333		break;
1269	}	1334		}
1270	case HypothesisTest::Test::Tail::Two: {	1335		case HypothesisTest::Test::Tail::Two: {
1271	pValue = 2.*gsl_cdf_tdist_P(-fabs(value), df);	1336		pValue.append(2.*gsl_cdf_tdist_P(-fabs(value), df));
1272		1337
1273	printLine(0, i18n("Null Hypothesis: Population mean of %1 %2 Population mean of %3", col1Name, UTF8_QSTRING("="), col2Name), "blue");	1338		printLine(0, i18n("Null Hypothesis: Population mean of %1 %2 Population mean of %3", col1Name, UTF8_QSTRING("="), col2Name), "blue");
1274	printLine(1, i18n("Alternate Hypothesis: Population mean of %1 %2 Population mean of %3", col1Name, UTF8_QSTRING("≠"), col2Name), "blue");	1339		printLine(1, i18n("Alternate Hypothesis: Population mean of %1 %2 Population mean of %3", col1Name, UTF8_QSTRING("≠"), col2Name), "blue");
Context not available.
1280	case HypothesisTest::Test::Type::ZTest: {	1345		case HypothesisTest::Test::Type::ZTest: {
1281	switch (tailType) {	1346		switch (tailType) {
1282	case HypothesisTest::Test::Tail::Negative: {	1347		case HypothesisTest::Test::Tail::Negative: {
1283	pValue = gsl_cdf_gaussian_P(value - mean, sp);	1348		pValue.append(gsl_cdf_gaussian_P(value - mean, sp));
1284	printLine(0, i18n("Null Hypothesis: Population mean of %1 %2 Population mean of %3 ", col1Name, UTF8_QSTRING("≥"), col2Name), "blue");	1349		printLine(0, i18n("Null Hypothesis: Population mean of %1 %2 Population mean of %3 ", col1Name, UTF8_QSTRING("≥"), col2Name), "blue");
1285	printLine(1, i18n("Alternate Hypothesis: Population mean of %1 %2 Population mean of %3 ", col1Name, UTF8_QSTRING("⋖"), col2Name), "blue");	1350		printLine(1, i18n("Alternate Hypothesis: Population mean of %1 %2 Population mean of %3 ", col1Name, UTF8_QSTRING("⋖"), col2Name), "blue");
1286	break;	1351		break;
1287	}	1352		}
1288	case HypothesisTest::Test::Tail::Positive: {	1353		case HypothesisTest::Test::Tail::Positive: {
1289	value *= -1;	1354		value *= -1;
1290	pValue = nsl_stats_tdist_p(value - mean, sp);	1355		pValue.append(nsl_stats_tdist_p(value - mean, sp));
1291	printLine(0, i18n("Null Hypothesis: Population mean of %1 %2 Population mean of %3 ", col1Name, UTF8_QSTRING("≤"), col2Name), "blue");	1356		printLine(0, i18n("Null Hypothesis: Population mean of %1 %2 Population mean of %3 ", col1Name, UTF8_QSTRING("≤"), col2Name), "blue");
1292	printLine(1, i18n("Alternate Hypothesis: Population mean of %1 %2 Population mean of %3 ", col1Name, UTF8_QSTRING(">"), col2Name), "blue");	1357		printLine(1, i18n("Alternate Hypothesis: Population mean of %1 %2 Population mean of %3 ", col1Name, UTF8_QSTRING(">"), col2Name), "blue");
1293	break;	1358		break;
1294	}	1359		}
1295	case HypothesisTest::Test::Tail::Two: {	1360		case HypothesisTest::Test::Tail::Two: {
1296	pValue = 2.*gsl_cdf_gaussian_P(value - mean, sp);	1361		pValue.append(2.*gsl_cdf_gaussian_P(value - mean, sp));
1297	printLine(0, i18n("Null Hypothesis: Population mean of %1 %2 Population mean of %3 ", col1Name, UTF8_QSTRING("="), col2Name), "blue");	1362		printLine(0, i18n("Null Hypothesis: Population mean of %1 %2 Population mean of %3 ", col1Name, UTF8_QSTRING("="), col2Name), "blue");
1298	printLine(1, i18n("Alternate Hypothesis: Population mean of %1 %2 Population mean of %3 ", col1Name, UTF8_QSTRING("≠"), col2Name), "blue");	1363		printLine(1, i18n("Alternate Hypothesis: Population mean of %1 %2 Population mean of %3 ", col1Name, UTF8_QSTRING("≠"), col2Name), "blue");
1299	break;	1364		break;
Context not available.
1306	break;	1371		break;
1307	}	1372		}
1308		1373
1309	if (pValue > 1)	1374		if (pValue[0] > 1)
1310	return 1;	1375		return 1;
			sgerlachUnsubmitted Done check indentation sgerlach: check indentation
			devanshuagarwalAuthorUnsubmitted Done It is indented in the source file. I don't know why it is not shown in the differnce file. devanshuagarwal: It is indented in the source file. I don't know why it is not shown in the differnce file.
			sgerlachUnsubmitted Done ok. This seems to happen sometimes. sgerlach: ok. This seems to happen sometimes.
1311	return pValue;	1376		return pValue[0];
1312	}	1377		}
1313		1378
1314	int HypothesisTestPrivate::setSpanValues(HypothesisTestPrivate::Node* root, int& totalLevels) {	1379		int HypothesisTestPrivate::setSpanValues(HypothesisTestPrivate::Node* root, int& totalLevels) {
Context not available.
1473	".tg {border-collapse:collapse;border: 1px solid black;}"	1538		".tg {border-collapse:collapse;border: 1px solid black;}"
1474	".tg td{font-family:Arial, sans-serif;font-size:14px;padding:10px 5px;border: 1px solid black;overflow:hidden;word-break:normal;color:#333;background-color:#fff;}"	1539		".tg td{font-family:Arial, sans-serif;font-size:14px;padding:10px 5px;border: 1px solid black;overflow:hidden;word-break:normal;color:#333;background-color:#fff;}"
1475	".tg th{font-family:Arial, sans-serif;font-size:14px;font-weight:normal;padding:10px 5px;border: 1px solid black;overflow:hidden;word-break:normal;color:#333;background-color:#f0f0f0;}"	1540		".tg th{font-family:Arial, sans-serif;font-size:14px;font-weight:normal;padding:10px 5px;border: 1px solid black;overflow:hidden;word-break:normal;color:#333;background-color:#f0f0f0;}"
1476	"</style><table class=tg>";	1541		"</style>";
		1542
		1543		table += "<table class=tg>";
1477		1544
1478	table += " <tr>";	1545		table += " <tr>";
1479	int prevLevel = 0;	1546		int prevLevel = 0;
Context not available.
1501	cellEndTag;	1568		cellEndTag;
1502	}	1569		}
1503	table += " <tr>";	1570		table += " <tr>";
		1571		table += "</table>";
1504	return table;	1572		return table;
1505	}	1573		}
1506		1574
Context not available.
1527	printLine(0, errorMsg, "red");	1595		printLine(0, errorMsg, "red");
1528	}	1596		}
1529		1597
1530	void HypothesisTestPrivate::clearSummaryLayout() {
1531	for (int i = 0; i < 10; i++)
1532	resultLine[i]->clear();
1533	}
1534
1535	void HypothesisTestPrivate::clearTestView() {
1536	statsTable = "";
1537	clearSummaryLayout();
1538	}
1539
1540		1598
1541	/**********************************************************************************	1599		/**********************************************************************************
1542	* virtual functions implementations	1600		* virtual functions implementations
Context not available.