diff --git a/data/datasets/StatSci_Datasets.json b/data/datasets/StatSci_Datasets.json index af4d0a9fa..90a99abe0 100644 --- a/data/datasets/StatSci_Datasets.json +++ b/data/datasets/StatSci_Datasets.json @@ -1,2455 +1,3665 @@ { "categories": [ { "category_name": "Medicine", "subcategories": [ { "datasets": [ { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "West of Tokyo lies a large alluvial plain, dotted by a network of farming villages. Matui (1968) analysed the position of the 911 houses making up one of those villages. The area studied was a rectangle, 3 km by 4 km. A grid was superimposed over a map of the village, dividing its 12 square kilometres into 1200 plots, each 100 metres on a side. The numbers of houses on each of those plots are recorded in a 30 by 40 matrix of data.", "download": "http://www.statsci.org/data/general/matui.txt", "filename": "matui", "name": "Position of Houses in a Japanese Farming Village", "number_format": 31, "remove_quotes": true, "separator": "SPACE", "simplify_whitespaces": false, "skip_empty_parts": false, "use_first_row_for_vectorname": false }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "Larsen and Marx (1986) write \nSince Word War II, plutonium for use in atomic weapons has been produced at an Atomic Energy Commission facility in Hanford, Washington. One of the major safety problems encountered there has been the storage of radioactive wastes. Over the years, significant quantities of these substances - including strontium 90 and cesium 137 - have leaked from their open-pit storage areas into the nearby Columbia River, which flows along the Washington-Oregon border, and eventually empties into the Pacific Ocean. \nTo measure the health consequences of this contamination, an index of exposure was calculated for each of the nine Oregon counties having frontage on either the Columbia River or the Pacific Ocean. This particular index was based on several factors, including the county's stream distance from Hanford and the average distance of its population from any water frontage. As a covariate, the cancer mortality rate was determined for each of these same counties. \nThe data give the index of exposure and the cancer mortality rate during 1959-1964 for the nine Oregon counties affected. Higher index values represent higher levels of contamination. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nCounty\n\nName of county\n\nExposure\n\nIndex of exposure\n\nMortality\n\nCancer mortality per 100,000 man-years\n\n\n\n", "download": "http://www.statsci.org/data/general/hanford.txt", "filename": "hanford", "name": "Cancer Mortality near Hanford Reactor", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "The data show the incidence of nonmelanoma skin cancer among women in Minneapolis-St Paul, Minnesota, and Dallas-Fort Worth, Texas. The towns are coded 0 for St Paul and 1 for Forth Worth. \nOne would expect sun exposure to be greater in Texas than in Minnesota.", "download": "http://www.statsci.org/data/general/skin.txt", "filename": "skin", "name": "Skin Cancer in Texas and Minnesota", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "The data comes from an experiment to measure the mortality of cancer cells under radiation under taken in the Department of Radiology, University of Cape Town. Four hundred cells were placed on a dish, and three dishes were irradiated at a time, or occasion. After the cells were irradiated, the surviving cells were counted. Since cells would also die naturally, dishes with cells were put into the radiation chamber without being irradiated, to establish the natural mortality. This data gives only these zero-dose data. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nOccasion\n\nIrradiation occasion (1-27)\n\nSurvived\n\nNumber of cells surviving out of 400 placed on dish\n", "download": "http://www.statsci.org/data/general/radiatio.txt", "filename": "radiatio", "name": "Mortality of Cancer Cells", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true } ], "subcategory_name": "Oncology" }, { "datasets": [ { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "The data gives the time between 800 successive pulses along a nerve fibre. There are 799 observations rounded to the nearest half in units of 1/50 second. ", "download": "http://www.statsci.org/data/general/nerve.txt", "filename": "nerve", "name": "Time between Nerve Pulses", "number_format": 31, "remove_quotes": true, "separator": "auto", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": false + }, + { + "DateTime_format": "yyyy-MM-dd", + "comment_character": "#", + "create_index_column": false, + "description": "A group of female college students took a test that measured their verbal IQs and also underwent an MRI scan to measure the size of their brains (in 1000s of pixels)", + "download": "https://dasl.datadescription.com/download/data/3084", + "filename": "Brain-size", + "name": "Brain size", + "number_format": 31, + "remove_quotes": true, + "separator": "TAB", + "simplify_whitespaces": false, + "skip_empty_parts": true, + "use_first_row_for_vectorname": true + }, + { + "DateTime_format": "yyyy-MM-dd", + "comment_character": "#", + "create_index_column": false, + "description": "An experiment was performed to see whether sensory deprivation over an extended period of time has any effect on the alpha-wave patterns produced by the brain. To determine this, 20 subjects, inmates in a Canadian prison, were randomly split into two groups. Members of one group were placed in solitary confinement. Those in the other […] ", + "download": "https://dasl.datadescription.com/download/data/3085", + "filename": "Brain-waves", + "name": "Brain waves", + "number_format": 31, + "remove_quotes": true, + "separator": "TAB", + "simplify_whitespaces": false, + "skip_empty_parts": true, + "use_first_row_for_vectorname": true } ], "subcategory_name": "Neurology" }, { "datasets": [ { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "Students in an introductory statistics class (MS212 taught by Professor John Eccleston and Dr Richard Wilson at The University of Queensland) participated in a simple experiment. The students took their own pulse rate. They were then asked to flip a coin. If the coin came up heads, they were to run in place for one minute. Otherwise they sat for one minute. Then everyone took their pulse again. The pulse rates and other physiological and lifestyle data are given in the data. \nFive class groups between 1993 and 1998 participated in the experiment. The lecturer, Richard Wilson, was concerned that some students would choose the less strenuous option of sitting rather than running even if their coin came up heads, so in the years 1995-1998 a different method of random assignment was used. In these years, data forms were handed out to the class before the experiment. The forms were pre-assigned to either running or non-running and there were an equal number of each. In 1995 and 1998 not all of the forms were returned so the numbers running and sitting was still not entirely controlled.", "download": "http://www.statsci.org/data/oz/ms212.txt", "filename": "ms212", "name": "Pulse Rates before and after Exercise", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "A study was conducted at a major north eastern American medical centre regarding blood cholesterol levels and heart-attack incidents. A total of 28 heart-attack patients had their cholesterol levels measured two days, 4 days, and 14 days after the attack. In addition, cholesterol levels were recorded for a control group of 30 people who had not had a heart attack. The units of cholesterol measurement are not given in the original reference but are presumably mg/dL of blood.", "download": "http://www.statsci.org/data/general/cholestg.txt", "filename": "cholestg", "name": "Cholesterol Levels after Heart Attack", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "This was a pilot study for the experiment described in Recovery of Patients from Stroke. The purpose of the study was to compare four evaluation tools for assessing the recovery of patients who had recently suffered a stroke. The four tools were (1) the Goteburg Assessment Form of Hemiplegia, (2) the Bobath Assessment Form, (3) the Barthel Index and (4) the Kenny Scoring System. The Goteburg Assessment was divided into seven components measuring motor function and balance, some sensation qualities, passive range of motion and occurrence of joint pain. The Bobath from evaluates three areas of motor performance, postural reactions, voluntary movement, and balance and automatic protective reactions. The Barthel index and the Kenny Scoring system evaluate ability to carry out activities of daily living such as dressing, feeding, toileting etc. \nTwenty subjects were selected from two large public hospitals in Brisbane. All subjects had recently suffered a cerebrovascular accident resulting in hemiplegia lasting at least 24 hours, had not previously been incapacitated from stroke or other disease and were currently receiving occupational therapy. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nSubject \n\nSubject ID (1-20)\n\nSex\n\nMale (M) or female (F)\n\nSide\n\nSide of brain affected, left (L) or right (R)\n\nAge\n\nAge of subject in years\n\nLapse\n\nTime since occurrence of stroke in weeks\n\nArms\n\nArm and shoulder motor function (max 36)\n\nLegs\n\nLower limb motor function (max 30)\n\nHands\n\nWrist and hand motor function (max24)\n\nBalance\n\nBalance score (max 14)\n\nSensation\n\nSensation score (max 24)\n\nJointPain\n\nFreedom from joint pain (max 24)\n\nJointMotion\n\nPassive joint motion (max 24)\n\nBobath\n\nTotal of Bobath Assessment Form (max 266)\n\nBarthel\n\nBarthel Index (max 100)\n\nKenny\n\nKenny scoring system of dailing living (max 24)\n\n\n\n\nThe researcher chose the Barthel Index and the first five components of the Goteburg Evaluation for use in the later experiment.", "download": "www.statsci.org/data/oz/strokeass.txt", "filename": "strokeass", "name": "Evaluation Tools for Stroke Rehabilitation", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "This study compared three occupational therapy programs designed to help patients recover from the effects of a stroke. Eight stroke patients were assigned to each of the three treatment groups. The first group (E) was given an experimental program developed by the investigator from a model of intervention for stroke rehabilitation. The second group (F) was given a pre-existing program. The third group (G) was a non-treatment program. Each program lasted for 8 weeks. All subjects were evaluated at the start of the program and at weekly intervals until the next of the program. \nGroup E and F patients were treated in the Occupational Therapy Department of a large Brisbane repatriation hospital. Group G patients were located in the wards of a large State Hospital in Brisbane. \nThe recovery status of each subject at each time was evaluated using the Goteburg Evaluation of Hemiplegia and the Barthel Index. The Goteburg evalation form gave separate scores for three motor function variables (upper limbs, hand and wrist, lower limbs) and for balance while the Barthel Index gave a single overall score. Higher scores indicate better functional ability. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nSubject \n\nSubject ID\n\nGroup\n\nExperimental (E), pre-existing (F) or non-treatment (G)\n\nSex\n\nMale (M) or female (F)\n\nSide\n\nSide of brain affected, left (L) or right (R)\n\nAge\n\nAge of subject in years\n\nLapse\n\nTime lapse from stroke to start of program in weeks\n\nUE1\n\nUpper extremities score (out of 36) at week 1\n\nUE2\n\n... week 2\n\nUE3\n\n... week 3\n\nUE4\n\n... week 4\n\nUE5\n\n... week 5\n\nUE6\n\n... week 6\n\nUE7\n\n... week 7\n\nUE8\n\n... week 8\n\nHW1\n\nHand-wrist score (out of 24) at week 1\n\nHW2\n\n... week 2\n\nHW3\n\n... week 3\n\nHW4\n\n... week 4\n\nHW5\n\n... week 5\n\nHW6\n\n... week 6\n\nHW7\n\n... week 7\n\nHW8\n\n... week 8\n\nLE1\n\nLower extremities score (out of 30) at week 1\n\nLE2\n\n... week 2\n\nLE3\n\n... week 3\n\nLE4\n\n... week 4\n\nLE5\n\n... week 5\n\nLE6\n\n... week 6\n\nLE7\n\n... week 7\n\nLE8\n\n... week 8\n\nBal1\n\nBalance score (out of 14) at week 1\n\nBal2\n\n... week 2\n\nBal3\n\n... week 3\n\nBal4\n\n... week 4\n\nBal5\n\n... week 5\n\nBal6\n\n... week 6\n\nBal7\n\n... week 7\n\nBal8\n\n... week 8\n\nBart1\n\nBarthel Index score (out of 100) at week 1\n\nBart2\n\n... week 2\n\nBart3\n\n... week 3\n\nBart4\n\n... week 4\n\nBart5\n\n... week 5\n\nBart6\n\n... week 6\n\nBart7\n\n... week 7\n\nBart8\n\n... week 8\n", "download": "http://www.statsci.org/data/oz/stroke.txt", "filename": "stroke", "name": "Recovery of Patients from Stroke", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true } ], "subcategory_name": "Cardiology" }, { "datasets": [ { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "How difficult is it to maintain your balance while concentrating? It is more difficult when you are older? Nine elderly (6 men and 3 women) and eight young men were subjects in an experiment. Each subject stood barefoot on a \"force platform\" and was asked to maintain a stable upright position and to react as quickly as possible to an unpredictable noise by pressing a hand held button. The noise came randomly and the subject concentrated on reacting as quickly as possible. The platform automatically measured how much each subject swayed in millimetres in both the forward/backward and the side-to-side directions.", "download": "http://www.statsci.org/data/general/balaconc.txt", "filename": "balaconc", "name": "Maintaining Balance while Concentrating", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "The data comes from a small study in Western Australia of hypertension, alcohol, and obesity. This study was partly designed to mimic a previously reported U.S. study based on a larger sample. A log-linear interaction model is a convenient and effective way of investigating associations among the three variables. A prior-posterior analysis of this 3 x 2 x 4 contingency table using prior information from the previous study (Klatsky et al., 1977) may be appropriate. The previous study reported the general conclusion that alcohol intake and obesity were significantly and independently associated with hypertension (blood pressure). Although a few summary statistics were reported, the full data were not published. One difference between the two studies was in the definition of obesity categories.\nThe data is listed as follows: the first column (Obesity) contains a numerical value representing the level of obesity (1=low, 2=average, 3=high), the second column (BP) contains a numerical indicator of the presence of hypertension (0=no, 1 =yes). The next five columns are labelled with the levels of alcoholic intake of the subjects, in drinks per day. These columns contain the frequency of observations that have this level of intake, for each group of obesity level and hypertension presence.", "download": "http://www.statsci.org/data/oz/alchyp.txt", "filename": "alchyp", "name": "Alcohol, Hypertension and Obesity", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "FEV (forced expiratory volume) is an index of pulmonary function that measures the volume of air expelled after one second of constant effort. The data contains determinations of FEV on 654 children ages 6-22 who were seen in the Childhood Respiratory Desease Study in 1980 in East Boston, Massachusetts. The data are part of a larger study to follow the change in pulmonary function over time in children. \nID\n - \nID number\nAge\n - \nyears\nFEV\n - \nlitres\nHeight\n - \ninches\nSex\n - \nMale or Female\nSmoker\n - \nNon = nonsmoker, Current = current smoker\n", "download": "http://www.statsci.org/data/general/fev.txt", "filename": "fev", "name": "Childhood Respiratory Disease", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "The data give the results of an study aimed at reducing the risk of HIV infection among African-American adolescents. The subjects were 14-18 year old female and male adolescents in a Southern USA city. The study compared two interventions. The treatment intervention was an 8-week Behavioural Skills Training (BST) program. The control was an single 2-hour education session about HIV and AIDS. The subjects completed sexual attitude and activity questionnaires before and after the intervention and at 6-month and 12-month follow-ups. The data here are for 10 subjects for each intervention although the original study was much larger. The data given here appear to have been created by Howell (1999) based on summary statistics from the original study. The dependent variable is the logarithm-transformed frequency of condom-protected sex ( log(Y+1) ). \n\n\n\n\nVariable\n\nDescription\n\n\n\n\nBST\n\n1 = BST intervention, 0 = control\n\nPre\n\nLog-frequency of protected sex before the intervention\n\nPost\n\nLog-frequency of protected sex after the intervention\n\nFU6\n\nLog-frequency of protected sex reported at the 6 months follow-up\n\nFU6\n\nLog-frequency of protected sex reported at the 12 months follow-up\n", "download": "http://www.statsci.org/data/general/protsex.txt", "filename": "protsex", "name": "Behavioural Skills Training and Protected Sex", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "HARVEST (Hypertension and Ambulatory Recording Venetia Study) is a trial designed to assess whether ambulatory monitoring adds something to office (clinical) blood pressure in predicting the development of fixed hypertension and of cardiovascular complications in patients with borderline to mild hypertension. Ambulatory monitoring refers to the measuring of home blood pressure by an annotated device that the subject wears for 24 hours. The data give information on 1100 subjects compiled by Dr Paolo Palatini, Professor of Clinical Medicine at the University of Padua, Italy. \nPatients were eligible for the study if they satisfied the following criteria: \ndiastolic blood pressure (BP) between 90 and 100 mm Hg or isolated systolic hypertension (systolic BP greater than or equal to 140 mm Hg and diastolic BP less than 90 mm Hg) \nnever been treated for hypertension \naged 18 to 45 years old \nfree from other important risk factors for atherosclerosis\nThe subjects were followed for 5 years. Baseline examinations, including ECG and echocardiography, were repeated at the end of the study or upon development of hypertension, defined as BP persistently 100 mm Hg or greater or a systolic BP of 160 mm Hg or greater. Ambulatory monitoring was repeated 3 months and 5 years after the baseline evaluation.\nThe symbol C or A after the name of a variable means:\nC = clinical examination; A = ambulatory (home monitoring)\nThe last symbol of a variable name may be B, 3, 5 or E:\nB = baseline examination\n3 = 3-month examination\n5 = 5-year examination\nE = endpoint examination \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nSmoke\n\nSmoking status at baseline examination:\n0 = non-smoking,\n1 = 1-5 cigarettes per day,\n2 = 6-10 cigarettes per day,\n3 = 11-20 cigarettes per day.\n\nSport\n\nSport activity at baseline examination:\n0 = only sedentary,\n1 = light activity (walking),\n2 = sports non-competitive,\n3 = sports competitiv.\n\nSBP\n\nSystolic blood pressure\n\nDBP\n\nDiastolic blood pressure\n\nHR\n\nHeart rate\n\nAge\n\nAge in years\n\nBMI\n\nBody mass index: 100 * weight (kg) / height (m)2\n\nEndPoint\n\nEndpoint status at the time the file was created:\n1 = blood pressure level hypertensive\n0 = blood pressure level not hypertensive\n\nTime\n\nTime in months from baseline examination to the date of endpoint or to May 30, 1999, whichever was earlier\n\nMale\n\nGender:\n1 = male\n0 = female\n", "download": "http://www.statsci.org/data/general/harvest.txt", "filename": "harvest", "name": "HARVEST Trial", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "The data is a subset from the Six Cities study, a longitudinal study of the health effects of air pollution. The data contain repeated binary measures of the wheezing status (1 = yes, 0 = no) for each of 537 children from Stuebenville, Ohio, at ages 7, 8, 9 and 10 years. Also measured is whether or not the mother was a smoker during the first year of the study.", "download": "http://www.statsci.org/data/general/wheeze.txt", "filename": "wheeze", "name": "Child's Wheeze and Mother's Smoking", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "The National Trachoma and Eye Health Program (1980) reports on the prevalance of otitis media (an infection that produces pus within the middle ear) in both aboriginal and non-aboriginal communities in Australia. The Program surveyed all aboriginal communities in Australia and attempted to contact all aborigines. Simultaneously, contact was made with non-aborigines usually living in the same or adjacent locations. Because of the high prevalence of infection in the aboriginal community only severe cases were classified as infected, virtually all of them suffering bursting of the ear drum and consequent scarring. It was thought that scarring could be used to identify those people who previously had had sever infections, but were not currently infected. So it was possible to classify subjects as (a) either not currently infected and no scarring, (b) currently infected or (c) not currently infected but one or more drums scarred. The data give the number of aborigines examined in various age intervals and the proportions classified as (a), (b) or (c). \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nAge \n\nAge interval (years)\n\nExamined \n\nNumber of subjected examined\n\nNone \n\nProportion not currently infected and with no scarring\n\nCurrent \n\nProportion currently infected\n\nPast \n\nPropotion not currently infected but with one or both drums scarred\n", "download": "http://www.statsci.org/data/oz/otitis.txt", "filename": "otitis", "name": "Prevalence of Otitis Media in Aboriginal Communities", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "In a study of the effect of ticks on cattle in North Queensland, the disease status of animals exposed to the tick-borne parasite Anaplasma marginale is of some concern. A symptom of infection from this parasite, the number of red blood cells can be redu ced by up to 80% at the point of peak anaemia. The problem to be considered here concerns a way of quantifying the change in red blood cell populations during the recovery stages of the disease. \nIn a laboratory trial, cows were inoculated with the parasite and their red blood cells monitored before and after inoculation. The data collected were in the form of red cell volume distributions obtained from a Coulter counter, truncated and sorted into groups. In work as yet unpublished, McLaren et al. have addressed the problem of fitting distributions to similar data from humans suffering myelodysplastic anaemia, and McLaren (private communication) has suggested the need to develop hypothesis testing procedures for this type of data. \nThe observed counts of red cell volume from one of the cows on days 21 (Freq1) and 23 (Freq2) after inoculation are listed. The counts are grouped into 18 intervals of equal width of 7.2 fl. The first column (Group) lists the group number, the second (Vol) lists the truncated lower endpoint of the cell volume interval. The lower and upper truncation values for these red cell volume counts were 21.6 fl and 151.2 fl respectively. A cursory inspection of the two sets of observed frequency counts in histogram form on the logarithmic scale suggest that the red blood cell volume distribution is bimodal, at least at 21 days after inoculation. \n", "download": "http://www.statsci.org/data/oz/rbcmix.txt", "filename": "rbcmix", "name": "Red Blood Cell Volume Data for Cows", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true + }, + { + "DateTime_format": "yyyy-MM-dd", + "comment_character": "#", + "create_index_column": false, + "description": "Does blood pressure, on average, change with age. The data here are two categorical variables: Blood pressure categorized as High, Normal, Low, and Age categorized as under 30, 30-49, and over 50", + "download": "https://dasl.datadescription.com/download/data/3077", + "filename": "Blood-Pressure", + "name": "Blood Pressure", + "number_format": 31, + "remove_quotes": true, + "separator": "TAB", + "simplify_whitespaces": false, + "skip_empty_parts": true, + "use_first_row_for_vectorname": true } ], "subcategory_name": "Common" }, { "datasets": [ { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "The effect of a single 600 mg dose of absorbic acid versus a sugar placebo on the muscular endurance (as measured by repetitive grip strength trials) of fifteen male volunteers (19-23 years old) was evaluated. The study was conducted in a double-blind manner with crossover. \nThree initial maximal contractions were performed for each subject, with the greatest value indicating maximal grip strength. Muscular endurance was measured by having the subjects squeeze the dynamometer, hold the contraction for three seconds, and repeat continuously until a value of 50% maximum grip strength was achieved for three consecutive contractions. Endurance time was defined as the number of repetitions required to go from maximum grip strength to the initial 50% value. Subjects were given frequent positive verbal encouragement in an effort to have them complete as many repetitions as possible. ", "download": "http://www.statsci.org/data/general/vitaminc.txt", "filename": "vitaminc", "name": "Effect of Vitamin C on Muscular Endurance", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "Proponents of Reiki, a type of touch therapy, hypothesize that Reiki re-establishes the energy balance in areas of the body experiencing disease and discomfort, thus promoting healing, reducing pain and increasing quality of life. The main feature that distinguishes Reiki from other couch therapies, such as therapeutic touch, is that Reiki therapists have physical contact with the body. Participants in Reiki are fully-clothed and may be covered with a blanket if they wish. The treatment, delivered to 18 specific areas of the body, begins with the participant lying on his or her back. The hands are placed on 10 distinct locations on the head and torso. The participant is then asked to lie on his or her stomach (or side, if this is more comfortable), where the hands are placed on 8 additional distinct locations covering the back, hip area and feet. The treatment takes approximately 1.25 hours to complete. \nTreatment of cancer pain usually focuses on opioids. Since high doses of opioids frequently aggravate other common symptoms of cancer patients, it is of interest to explore non-drug treatments that may allow control of cancer pain with lower doses of opioids. This project studied whether Reiki is beneficial in the management of pain for people from the community experiencing general chronic pain, as a prelimary step in deciding whether Reiki is worth trying for cancer patients. \nThe Sample \nThe eligibility criteria were that subjects must be at least 18 years old, not receiving chemotherapy or radiotherapy, be experiencing moderate pain (at least 3 on a VAS (0-10) or 2 on a Likert scale (0-5)), have normal cognitive function, be able to speak, read and write English, and be willing to complete the study rating scales. The sample size necessary was calculated using the binomial distribution with the assumption that 50% of the study participants might be expected to benefit from treatment. The probability of a decrease in pain following treatment in 14 or more cases out of 20 by chance alone is 0.058. [VAS means \"Visual Analogue Scale\". A Likert-type item consists of a single statement, followed by a usually five or six-point choice with each choice described in words.] \nNotices were placed in retail establishments and community centres. Potential participans identified themselves by telephoning the research assistant at a number provided on the recruitment posters. Individuals who met the eligibility criteria and who signed a consent form were scheduled to receive a treatment by a Reiki therapist. \nTwenty People were recruited (18 women and 2 men) who ranged in age from 23 to 62 years (mean 44 years). These participants were currently experiencing pain at 55 sites. Ten participants had pain in their upper body and 4 in their lower body. The remaining 6 participants had pain in both the upper and lower parts of their body. Eight participants attributed their pain to bone and muscle problems and 5 participants to chronic illness. Three of the participants included in the chronic illness group had cancer. Six participants had been experiencing pain for 1 year or less, and 7 had been experiencing pain for more than 1 year, up through 7 years. The remaining seven had been in pain for more than 7 years, one for 48 years. \nEighteen participants had asked their physician for help with their pain, and 19 were currently using at least 1 of the following strategies to manage it: analgesic preparations, anti-inflammatory medications, exercise, massage, acupuncture, therapeutic touch, chiropractic, homeopathy, meditation, vitamins, steam, muscle relaxation techniques and Tai Chi. \nTreatment and Data Collection \nParticipants were given 1 treatment by the Reiki therapist in her office. They lay on a massage table fully clothed and, if desired, were also covered with a sheet or blanket. The lights were dimmed, and a candle was lit; soft music played in the background. The environment was consistent through all 20 treatments. A pain VAS ranging from 0 to 10 and a Likert scale ranging from 0 to 5 were completed immediately before and after the Reiki treatment. ", "download": "http://www.statsci.org/data/general/reiki.txt", "filename": "reiki", "name": "Using Reiki to Manage Pain", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "The data consist of measurements (x1, x2, Age in months) on 23 babies, collected in the Faculty of Medicine at the University of Hong Kong. It would be of great medical interest to find a relationship between x1 and x2. However, any correlation between them is likely spurious because both x1 and x2 tend to increase with age. See Chris Lloyd's original mailing to the ANZStat mailing list discussion.", "download": "http://www.statsci.org/data/general/babies.txt", "filename": "babies", "name": "Measurements on Babies", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "When anthropologists analyze human skeletal remains, an important piece of information is living stature. Since skeletons are commonly based on statistical methods that utilize measurements on small bones. The following data was presented in a paper in the American Journal of Physical Anthropology to validate one such method. \n\n\n\n\nVariable\n\nDescription\n\n\n\n\nMetaCarp\n\nMetacarpal bone I length in cm\n\nStature\n\nStature in cm\n\n\n\n", "download": "http://www.statsci.org/data/general/stature.txt", "filename": "stature", "name": "Prediction of Height from Metacarpal Bone Length", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "CPK (creatine phosphokinase) is a enzyme contained within muscle cells which is necessary for the storage and release of energy. It can be released into the blood in response to vigorous exercise from damaged (leaky) muscle cells. This occurs often even in healthy athletes. \nThis study intestigated the metabolic effect of cross-country skiing. Subjects were participants in a 24 hour cross-country relay. Age, weight (kg) and blood CPK concentration 12 hours into the relay were recorded.", "download": "http://www.statsci.org/data/general/bloodcpk.txt", "filename": "bloodcpk", "name": "Blood CPK in Cross-Country Skiers", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "Studies conducted at the University of Melbourne indicate that there may be a difference between the pain thresholds of blonds and brunettes. Men and women of various ages were divided into four categories according to hair colour: light blond, dark blond, light brunette, and dark brunette. The purpose of the experiment was to determine whether hair colour is related to the amount of pain produced by common types of mishaps and assorted types of trauma. Each person in the experiment was given a pain threshold score based on his or her performance in a pain sensitivity test (the higher the score, the higher the person’s pain tolerance). \n\nVariable\n\nValues\n\nHairColour\n\nLightBlond, DarkBlond, LightBrunette or DarkBrunette \n\nPain\n\nPain theshold score \n", "download": "http://www.statsci.org/data/oz/blonds.txt", "filename": "blonds", "name": "Pain Thresholds of Blonds and Brunettes", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "For his MS305 data project, Michael Larner measured the weight and various physical measurements for 22 male subjects aged 16 - 30. Subjects were randomly chosen volunteers, all in reasonable good health. Subjects were requested to slightly tense each muscle being measured to ensure measurement consistency. Apart from Mass, all measurements are in cm. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nMass\n\nWeight in kg\n\nFore\n\nMaximum circumference of forearm\n\nBicep\n\nMaximum circumference of bicep\n\nChest\n\nDistance around chest directly under the armpits\n\nNeck\n\nDistance around neck, approximately halfway up\n\nWaist\n\nDistance around waist, approximately trouser line\n\nThigh\n\nCircumference of thigh, measured halfway between the knee and the top of the leg\n\nCalf\n\nMaximum circumference of calf\n\nHeight\n\nHeight from top to toe\n\nShoulders\n\nDistance around shoulders, measured around the peak of the shoulder blades\n", "download": "http://www.statsci.org/data/oz/physical.txt", "filename": "physical", "name": "Mass and Physical Measurements for Male Subjects", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "Larsen and Marx (1986) write \nIn folklore, the full moon is often portrayed as something sinister, a kind of evil force possessing the power to control our behaviour. Over the centuries, many prominent writers and philosophers have shared this belief. Milton, in Paradise Lost, refers to \nDemoniac frenzy, moping melancholy\nAnd moon-struck madness. \nAnd Othello, after the murder of Desdemona, laments \nIt is the very error of the moon\nShe comes more near the earth than she was want\nAnd makes men mad. \nOn a more scholarly level, Sir William Blackstone, the renowned eighteenth centure English barrister, defined a \"lunatic\" as \none who hath ... lost the use of his reason and who hath lucid intervals, sometimes enjoying his senses and sometimes not, and that frequently depending upon changes of the moon. \nThe data give the admission rates to the emergency room of a Virginia mental health clinic before, during and after the 12 full moons from August 1971 to July 1972. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nMonth\n\nMonth of year: Aug, Sep, ... Jul\n\nMoon\n\nBefore, During or After the full moon\n\nAdmission\n\nAdmission rate (patients/day)\n\n\n\n", "download": "http://www.statsci.org/data/general/fullmoon.txt", "filename": "fullmoon", "name": "Mental Hospital Admissions During Full Moons", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "Five types of electrodes were applied to the arms of 16 subjects and the resistance measured. The experiment was designed to see whether all five electrode types performed similarly. \nAfter obtaining the results, the experimenters decided that the reason for the two large readings on subject 15 was the excessive amount of hair of those parts of the subject's arm. They concluded that this subject's data should be deleted. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nSubject\n\nSubject number\n\nE1\n\nResistance measured by electrode type 1\n\nE2\n\nResistance measured by electrode type 2\n\nE3\n\nResistance measured by electrode type 3\n\nE4\n\nResistance measured by electrode type 4\n\nE5\n\nResistance measured by electrode type 5\n", "download": "http://www.statsci.org/data/general/resist.txt", "filename": "resist", "name": "Skin Resistance", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "Osteoarthritis is a mechanical degeneration of joint surfaces causing pain, swelling and loss of joint function in one or more joints. Physiotherapists treat the affected joints to reduce pain (VAS = visual analogue scale) and to increase the range of movement (ROM). In this study there were 10 subjects, each of whom was treated with continuous TENS (electric nerve stimulation) and short wave diathermy. Measurements were taken also after no treatment. \n\n\n\n\nVariable\n\nDescription\n\n\n\n\nSubject\n\nSubject identifier\n\nNoROM\n\nROM after no treatment\n\nNoVAS\n\nVAS after no treatment\n\nTENSROM\n\nROM after continuous TENS\n\nTENSVAS\n\nVAS after continuous TENS\n\nSWDROM\n\nROM after short wave diathermy\n\nSWDVAS\n\nVAS after short wave diathermy\n", "download": "http://www.statsci.org/data/oz/oa.txt", "filename": "oa", "name": "Treatment for Osteoarthritis", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "The balance of subjects were observed for two different surfaces and for restricted and unrestricted vision. Balance was assessed qualitatively on an ordinal 4-point scale based on observation by the experimenter. Subjects were expected to be better balanced (show less sway) when standing on the normal surface than on foam, and when their eyes were open rather than closed or when their vision was restricted by a dome. \nEqual numbers of male and female subjects were chosen. For both males and females, ten older (more than 24 years old) and ten younger subjects were selected. \nThe data is available in two formats. The is in univariate or \"strung out form\" which is suitable for entry to Minitab or S-Plus and to most mixed model programs. The second is in repeated measures format which is suitable for SPSS and for most special purpose repeated measures programs. \nUnivariate format: \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nSubject\n\n1 to 40\n\nSex\n\nmale or female\n\nAge\n\nAge of subject in years\n\nHeight\n\nHeight in cm\n\nWeight\n\nWeight in kg\n\nSurface\n\nnormal or foam\n\nVision\n\neyes open, eyes closed, or closed dome\n\nCTSIB\n\nQualitive measure of balance, 1 (stable) - 4 (unstable) \n\n\n\n", "download": "http://www.statsci.org/data/oz/ctsibuni.txt", "filename": "ctsibuni", "name": "Effect of Surface and Vision on Balance", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "The data related to the transport of sulfite ions from blood cells suspended in a salt solution. The chloride concentration (%) was measured over a period of about 8 minutes as a continuous curve generated from electrical potentials. The data given here were digitized from the curve at 10 second intervals. \nThe theory of ion transport suggested that the concentration asymptote exponentially, i.e., \nChloride = q1{1 - q2exp(- q3Time)} \n\n\n\n\nVariable\n\nDescription\n\n\n\n\nTime\n\nElapsed time in minutes\n\nChloride\n\nChloride concentration (%)\n", "download": "http://www.statsci.org/data/general/chloride.txt", "filename": "chloride", "name": "Transport of Sulfite Ions from Blood Cells", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "The data are the times, in days, that heroin addicts spend in a clinic. There are two clinics and the covariates are believed to affect the times spent in the clinic by addicts. \n \nVariable\n \nDescription\n\nClinic\n\n1 or 2\n\nStatus\n\n0 = still in clinic at end of study (censored) or 1 = departed from clinic\n\nTime\n\ndays spent in clinic\n\nPrison\n\n1 = prison record or 0 = no record\n\nDose\n\nmethadone dosage (mg/day)\n", "download": "http://www.statsci.org/data/oz/heroin.txt", "filename": "heroin", "name": "Methadone Treatment of Heroin Addicts", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true + }, + { + "DateTime_format": "yyyy-MM-dd", + "comment_character": "#", + "create_index_column": false, + "description": "A pharmaceutical company tested three formulations of a pain relief medicine for migraine headache sufferers. For the experiment, 27 volunteers were selected and 9 were randomly assigned to one of three drug formulations. The subjects were instructed to take the drug during their next migraine headache episode and to report their pain on a scale […] ", + "download": "https://dasl.datadescription.com/download/data/3053", + "filename": "Analgesics", + "name": "Analgesics", + "number_format": 31, + "remove_quotes": true, + "separator": "TAB", + "simplify_whitespaces": false, + "skip_empty_parts": true, + "use_first_row_for_vectorname": true + }, + { + "DateTime_format": "yyyy-MM-dd", + "comment_character": "#", + "create_index_column": false, + "description": "A study compared the effectiveness of several antidepressants by examining the experiments in which they had passed the FDA requirements. Each of those experiments compared the active drug with a placebo, an inert pill given to some of the subjects. In each experiment some patients treated with the placebo had improved, a phenomenon called the […] ", + "download": "https://dasl.datadescription.com/download/data/3054", + "filename": "Antidepressants", + "name": "Antidepressants", + "number_format": 31, + "remove_quotes": true, + "separator": "TAB", + "simplify_whitespaces": false, + "skip_empty_parts": true, + "use_first_row_for_vectorname": true + }, + { + "DateTime_format": "yyyy-MM-dd", + "comment_character": "#", + "create_index_column": false, + "description": "A student investigated just how effective washing with soap is in eliminating bacteria. To do this she tested four different methods—washing with water only, washing with regular soap, washing with antibacterial soap (ABS), and spraying hands with antibacterial spray (AS) (containing 65% ethanol as an active ingredient). Her experiment consisted of one experimental factor, the […] ", + "download": "https://dasl.datadescription.com/download/data/3561", + "filename": "Baterial-soap", + "name": "Baterial soap", + "number_format": 31, + "remove_quotes": true, + "separator": "TAB", + "simplify_whitespaces": false, + "skip_empty_parts": true, + "use_first_row_for_vectorname": true + }, + { + "DateTime_format": "yyyy-MM-dd", + "comment_character": "#", + "create_index_column": false, + "description": "Measurements of 250 men of various ages. The percent of a man’s body that is fat is a matter of concern for health and fitness. But the %bodyfat is difficult and expensive to measure accurately. These data offer correct %bodyfat measurements along with a variety of easier to find measures. Can you build a model ", + "download": "https://dasl.datadescription.com/download/data/30790", + "filename": "Bodyfat", + "name": "Bodyfat", + "number_format": 31, + "remove_quotes": true, + "separator": "TAB", + "simplify_whitespaces": false, + "skip_empty_parts": true, + "use_first_row_for_vectorname": true + }, + { + "DateTime_format": "yyyy-MM-dd", + "comment_character": "#", + "create_index_column": false, + "description": "Burger King publishes full nutrition information on its menu. These data are for the foods on the menu recently. (Visit the site listed as the reference for the most current list.) ", + "download": "https://dasl.datadescription.com/download/data/3089", + "filename": "Burger-King-items", + "name": "Burger King items", + "number_format": 31, + "remove_quotes": true, + "separator": "TAB", + "simplify_whitespaces": false, + "skip_empty_parts": true, + "use_first_row_for_vectorname": true + }, + { + "DateTime_format": "yyyy-MM-dd", + "comment_character": "#", + "create_index_column": false, + "description": "Nutritionists are concerned that people have a good breakfast. But what does that mean? students collected nutrition information from the nutrition labels of cereals in one supermarket. ", + "download": "https://dasl.datadescription.com/download/data/3107", + "filename": "Cereals", + "name": "Cereals", + "number_format": 31, + "remove_quotes": true, + "separator": "TAB", + "simplify_whitespaces": false, + "skip_empty_parts": true, + "use_first_row_for_vectorname": true } ], "subcategory_name": "Other" }, { "datasets": [ { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "Why do older people often seem not to remember things as well as younger people? Do they not pay attention? Do they just not process the material as thoroughly? One theory regarding memory is that verbal material is remembered as a function of the degree to which is was processed when it was initially presented. Eysenck (1974) randomly assigned 50 younger subjects and 50 older (between 55 and 65 years old) to one of five learning groups. The Counting group was asked to read through a list of words and count the number of letters in each word. This involved the lowest level of processing. The Rhyming group was asked to read each word and think of a word that rhymed with it. The Adjective group was asked to give an adjective that could reasonably be used to modify each word in the list. The Imagery group was instructed to form vivid images of each word, and this was assumed to require the deepest level of processing. None of these four groups was told they would later be asked to recall the items. Finally, the Intentional group was asked to memorize the words for later recall. After the subjects had gone through the list of 27 items three times they were asked to write down all the words they could remember. \n\n\n\n\nVariable\n\nDescription\n\n\n\n\nAge\n\nYounger or Older\n\nProcess\n\nThe level of processing: Counting, Rhyming, Adjective, Imagery or Intentional\n\nWords\n\nNumber of words recalled\n", "download": "http://www.statsci.org/data/general/eysenck.txt", "filename": "eysenck", "name": "Age and Memory", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "Nolen-Hoeksema and Morrow (1991) had the good fortune to have measured depression among college students 2 weeks before the Loma Prieta earthquake in California in 1989. Nolen-Hoeksema and Morrow collected repeat data to track the students’ adjustments to the earthquake. Measurements were taken every 3 weeks starting 2 weeks before the earthquake to 10 weeks after. The data were recreated by Howell (1999) based on the Nolen-Hoeksema and Morrow findings. Each row gives the depression scores for one student.\n\n\nVariable\n\nDescription\n\n\n\n\n\nWeek0\n\nDepression scores 2 weeks before the earthquake\n\nWeek3\n\nDepression scores one week the quake\n\nWeek6\n\nDepression scores 4 weeks after the quake\n\nWeek9\n\nDepression scores 7 weeks after the quake\n\nWeek12\n\nDepression scores 10 weeks after the quake\n", "download": "http://www.statsci.org/data/general/lomaprie.txt", "filename": "lomaprie", "name": "Depression Before and After an Earthquake", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true + }, + { + "DateTime_format": "yyyy-MM-dd", + "comment_character": "#", + "create_index_column": false, + "description": " In a random sample of U.S. adults surveyed in December 2011, Pew Research asked how important it is “to you personally” to be successful in a high-paying career or profession. Responses are recorded by sex and age. ", + "download": "https://dasl.datadescription.com/download/data/3071", + "filename": "Being-successful", + "name": "Being successful", + "number_format": 31, + "remove_quotes": true, + "separator": "TAB", + "simplify_whitespaces": false, + "skip_empty_parts": true, + "use_first_row_for_vectorname": true + }, + { + "DateTime_format": "yyyy-MM-dd", + "comment_character": "#", + "create_index_column": false, + "description": "A researcher at Cornell University wanted to know how friendship might affect simple sales such as this. She randomly divided subjects into two groups and gave each group descriptions of items they might want to buy. One group was told to imagine buying from a friend whom they expected to see again. The other group […] ", + "download": "https://dasl.datadescription.com/download/data/3090", + "filename": "Buy-from-a-friend", + "name": "Buy from a friend", + "number_format": 31, + "remove_quotes": true, + "separator": "TAB", + "simplify_whitespaces": false, + "skip_empty_parts": true, + "use_first_row_for_vectorname": true } ], "subcategory_name": "Psychology" + }, + { + "datasets": [ + { + "DateTime_format": "yyyy-MM-dd", + "comment_character": "#", + "create_index_column": false, + "description": "A study examined the health risks of smoking measured the cholesterol levels of people who had smoked for at least 25 years and people of similar ages who had smoked for no more than 5 years and then stopped", + "download": "https://dasl.datadescription.com/download/data/3111", + "filename": "Cholesterol-and-smoking", + "name": "Cholesterol and smoking", + "number_format": 31, + "remove_quotes": true, + "separator": "TAB", + "simplify_whitespaces": false, + "skip_empty_parts": true, + "use_first_row_for_vectorname": true + }, + { + "DateTime_format": "yyyy-MM-dd", + "comment_character": "#", + "create_index_column": false, + "description": "Data on 816 brands of cigarettes. What relationships are there among the nicotine content, tars, and CO? Are any brands unusually high or low in nicotine? Can you account for that? ", + "download": "https://dasl.datadescription.com/download/data/3113", + "filename": "Cigarettes", + "name": "Cigarettes", + "number_format": 31, + "remove_quotes": true, + "separator": "TAB", + "simplify_whitespaces": false, + "skip_empty_parts": true, + "use_first_row_for_vectorname": true + } + ], + "subcategory_name": "Smoking" } ] }, { "category_name": "Nature", "subcategories": [ { "datasets": [ { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "Daily rainfall (in millimetres) was recorded over a 47-year period in Turramurra, Sydney, Australia. For each year, the wettest day was identified (that having the greatest rainfall). The data show the rainfall recorded for the 47 annual maxima.", "download": "http://www.statsci.org/data/oz/sydrain.txt", "filename": "sydrain", "name": "Annual Maximums of Daily Rainfall in Sydney", "number_format": 31, "remove_quotes": true, "separator": "auto", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "These data were collected in a cloud-seeding experiment in Tasmania between mid-1964 and January 1971. The rainfalls are period rainfalls in inches. \nSeeded\n - \nS = seeded, U = unseeded\nSeason\n - \nAutumn, Winter, Spring Summer\nTE\n - \nrainfall in east target area\nTW\n - \nrainfall in west target area\nNC\n - \nrainfall in north control area\nSC\n - \nrainfall in south control area\nNWC\n - \nrainfall in north-west conrol area\n\n", "download": "http://www.statsci.org/data/oz/cloudtas.txt", "filename": "cloudtas", "name": "Cloud Seeding in Tasmania", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "The data are monthly averaged atmospheric pressure differences between Easter Island and Darwin, Australia. This difference drives the trade winds in the southern hemisphere. An annual cycle may be expected, and also longer cycles corresponding to the El Nino and to the Southern Oscillations. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nPressure\n\nMonthly average atmospheric pressure differences\n", "download": "http://www.statsci.org/data/oz/enso.txt", "filename": "enso", "name": "Pressure Difference between Easter Island and Darwin", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "Daily rainfall for Melbourne, from 1 January 1981 to 31 December 1990. Note that this series is 3 observations longer than the temperature series.", "download": "http://www.statsci.org/data/oz/melbrain.txt", "filename": "melbrain", "name": "Melbourne Daily Rainfall", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "Daily minimum and maximum temperatures for Melbourne, from 1 January 1981 to 31 December 1990. The two February 29 leap days are excluded, so there are 10 x 365 = 3650 observations.", "download": "http://www.statsci.org/data/oz/melbtemp.txt", "filename": "melbtemp", "name": "Melbourne Temperatures", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "Rainfall for each 6-day period for Adelaide from 1839 to 1977 inclusive. December 31 of the previous year is included in the non-leap years to make 15 6-day periods for each year. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nYear\n\n1839 - 1977\n\nPeriod\n\n1 - 61 for each year\n\nRainfall\n\nRainfall in \n", "download": "http://www.statsci.org/data/oz/adelrain.txt", "filename": "adelrain", "name": "Adelaide Rainfall", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "Daily 6am and 3pm temperatures for Brisbane for the decade 1977 - 1986. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nDay\n\nDay as YearMonthDay\n\nTemp06\n\n6am Temperature in degrees Celsius x 10\n\nTemp15\n\n3pm Temperature in degrees Celsius x 10\n", "download": "http://www.statsci.org/data/oz/bristemp.txt", "filename": "bristemp", "name": "Brisbane Temperatures", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "The columns in the data set represent the precipitation weighted mean concentrations of ions for the year 1986, for 47 sites in the United Kingdom. \n \nVariable\n \nDescription\n\nSite\n\nSite number \n\nRain\n\nRain (measured in mm) \n\nH\n\nH+ \n\nSO4\n\nSO4-2 \n\nNO3\n\nNO3- \n\nNH4\n\nNH4+ \n\nx\n\nx-coordinate (measured in cm) \n\ny\n\ny-coordinate (cm) \n\nThe measurement of NH4+ for site number 35 was not available and is represented by NA in the data set. The x- and y-cordinates were measured in cm from a map of the UK. ", "download": "http://www.statsci.org/data/general/rainuk.txt", "filename": "rainuk", "name": "Acid Rain in the UK", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true + }, + { + "DateTime_format": "yyyy-MM-dd", + "comment_character": "#", + "create_index_column": false, + "description": "Froliger and Kane measured the pH (a scale on which a value of 7 is neutral and values below 7 are acidic) of water collected from precipitation events in Allegheny County, Pennsylvania between December 20, 1973 and May 23, 1974. Display the distribution of these values and describe with words and numbers what you see. ", + "download": "https://dasl.datadescription.com/download/data/3041", + "filename": "acid-rain", + "name": "Acid rain", + "number_format": 31, + "remove_quotes": true, + "separator": "TAB", + "simplify_whitespaces": false, + "skip_empty_parts": true, + "use_first_row_for_vectorname": true + }, + { + "DateTime_format": "yyyy-MM-dd", + "comment_character": "#", + "create_index_column": false, + "description": "The data give the average January Temperature (in degrees Fahrenheit) and Latitude (in degrees north of the equator) for 59 U.S. cities. How are they related? ", + "download": "https://dasl.datadescription.com/download/data/3114", + "filename": "City-climate", + "name": "City climate", + "number_format": 31, + "remove_quotes": true, + "separator": "TAB", + "simplify_whitespaces": false, + "skip_empty_parts": true, + "use_first_row_for_vectorname": true + }, + { + "DateTime_format": "yyyy-MM-dd", + "comment_character": "#", + "create_index_column": false, + "description": "-", + "download": "https://dasl.datadescription.com/download/data/3115", + "filename": "City-temperatures", + "name": "City temperatures", + "number_format": 31, + "remove_quotes": true, + "separator": "TAB", + "simplify_whitespaces": false, + "skip_empty_parts": true, + "use_first_row_for_vectorname": true + }, + { + "DateTime_format": "yyyy-MM-dd", + "comment_character": "#", + "create_index_column": false, + "description": "Global temperature from https://www.ncdc.noaa.gov/cag/data-info/global Global temperature anomaly data come from the Global Historical Climatology Network-Monthly (GHCN-M) data set and International Comprehensive Ocean-Atmosphere Data Set (ICOADS), which have data from 1880 to the present. These two datasets are blended into a single product to produce the combined global land and ocean temperature anomalies. The available timeseries of global-scale temperature anomalies are calculated with respect to the 20th century average, while the mapping tool displays global-scale temperature anomalies with respect to the 1981-2010 base period. For more information on these anomalies, please visit Global Surface Temperature Anomalies. CO2 from ftp://aftp.cmdl.noaa.gov/products/trends/co2/co2_annmean_mlo.txt DJIA from https://www.measuringworth.com\n\nScientists claim that changes in the mean global temperature are primarily due to changes in CO2 levels. Both trends are here from 1959 to 2016. For an alternative, the data includes the annual closing price of the Dow Jones Industrial Average. Can it predict global temperature?", + "download": "https://dasl.datadescription.com/download/data/3116", + "filename": "Climate-change-2016", + "name": "Climate change 2016", + "number_format": 31, + "remove_quotes": true, + "separator": "TAB", + "simplify_whitespaces": false, + "skip_empty_parts": true, + "use_first_row_for_vectorname": true } ], "subcategory_name": "Weather" }, { "datasets": [ { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "The ocean swell produces spectacular eruptions of water through a hole in the cliff at Kiama, about 120km south of Sydney, known as the Blowhole. The times at which 65 successive eruptions occurred from 1340 hours on 12 July 1998 were observed using a digital watch. \nJim Irish writes \nAnyone who has visited the Blowhole more than once knows that the rate and volume of eruptions varies. This variation occurs at several timescales. We might expect that part is explained by the tides, so that eruptions are more frequent and spectacular when the tide is very high, and eruptions obviously depend on the presence of a large ocean swell generated by prolonged strong winds over the ocean well offshore from Kiama. Hence, any stochastic model fitted to data observed over a short period of time is only applicable to that period, and perhaps a few hours either side of the observations. But we might infer from the model fitted to those data that a similar model applies more generally. ", "download": "http://www.statsci.org/data/oz/kiama.txt", "filename": "kiama", "name": "Kiama Blowhole Eruptions", "number_format": 31, "remove_quotes": true, "separator": "auto", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "The data records the length of rivers in the South Island of New Zealand. The lengths are given in kilometres. The second variable, FlowsInto, indicates whether the river flows into the Pacific Ocean (0) or the Tasman Sea (1). A map of the island's rivers is included here.", "download": "http://www.statsci.org/data/oz/nzrivers.txt", "filename": "nzrivers", "name": "Length of New Zealand Rivers", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "Date on the concentration of polychlorinated biphenyl (PCB) residues in a series of lake trout from Cayuga Lake, NY, were reported in Bache et al (1972). The ages of the fish were accurately known, because the fish were annually stocked as yearlings and distinctly marked as to year class. Each whole fish was mechanically chopped, ground, and thoroughly mixed, and 5-gram samples taken. The samples were treated and PCB residues in parts per million (ppm) were estimated using column chromatography. \nBates and Watts (1988) use a linear model \nlog(PCB) = b1 + b2 Age1/3 \nbut they remark that the nonlinear model \nlog(PCB) = b1 + b2 Ageq \nis slightly better. \n\n\n\n\nVariable\n\nDescription\n\n\n\n\nAge\n\nAge of trout (years)\n\nPCB\n\nPCB concentration (ppm)\n", "download": "http://www.statsci.org/data/general/troutpcb.txt", "filename": "troutpcb", "name": "PCB Concentrations in Lake Trout", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "Jaffe, Parker and Wilson have investigated the concentration of several hydrophobic organic substances (such as hexachlorobenzene, chlordane, heptachlor, aldrin, dieldrin, endrin) in the Wolf River in Tennessee. Measurements were taken downstream of an abandoned dump site that had previously been used by the pesticide industry to dispose of its waste products. \nIt was expected that these hydrophic substances might have a nonhomogeneous vertical distribution in the river because of differences in density between these compounds and water and because of the adsorption of these compounds on sediments, which could lead to higher concentrations on the bottom. It is important to check this hypothesis because the standard procedure of sampling at six-tenths of the depth could miss the bulk of these pollutants if the distribution were not uniform. \nGrab samples were taken with a La Motte-Vandorn water sampler of 1 litre capacity at various depths of the river. This sampler consists of a horizontal plexiglas tube of 7 centimetres diameter and a plunger of each side which shuts the sampler when the sampler is at the desired depth. Ten surface, 10 mid-depth and 10 bottom samples were collected, all within a relatively short period. Until they were analysed the samples were stored in 1-quart mason jars at low temperature. \nIn the analysis of the samples, a 250-millilitre water sample was taken from each mason jar and was extracted with 1 millilitre of either hexanes or petroleum ether. A sample of the extract was then injected into a gas chromatograph and the output was compared against standards of known concentrations. The test procedure was repeated two more times, injecting different samples of the extract in the gas chromatograph. The average aldrin and hexachlorobenzene (HCB) concentrations (in nanograms per liter) in these 30 samples are given in the data.", "download": "http://www.statsci.org/data/general/wolfrive.txt", "filename": "wolfrive", "name": "Wolf River Pollution", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "The following data from the Statistical Abstract of the United States give the number of accidental oil spills at sea and the amount of oil lost in these spills for the years 1973 - 1985. \n\n\n\n\nVariable\n\nDescription\n\n\n\n\nYear\n\nYear\n\nSpills\n\nNumber of spills\n\nOil\n\nAmount of oil lost (thousands of metric tonnes)\n\n\n\n", "download": "http://www.statsci.org/data/general/spills.txt", "filename": "spills", "name": "Accidental Oil Spills", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "These data refer to a survey of the fauna on the sea bed lying between the coast of northern Queensland and the Great Barrier Reef. The sampling region covered a zone which was closed to commercial fishing, as well as neighbouring zones where fishing was permitted. In view of the large numbers and types of species captured in the survey the catch was summarized as a score, on a log weight scale, which combines information across species. Two such scores are available. The details of the survey, and a full analysis of the data, are in Poiner et al (1997). \n\n\n\n\nVariable\n\nDescription\n\n\n\n\nZone\n\nan indicator for the closed (1) and open (0) zones \n\nYear\n\nan indicator of 1992 (0) or 1993 (1) \n\nLatitude\n\nlatitude of the sampling position \n\nLongitude\n\nlongitude of the sampling position \n\nDepth\n\nbottom depth \n\nScore1\n\ncatch score 1 \n\nScore2\n\ncatch score 2 \n", "download": "http://www.statsci.org/data/oz/reef.txt", "filename": "reef", "name": "Prawn Trawling in the Great Barrier Reef", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true } ], "subcategory_name": "Waters" }, { "datasets": [ { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "Groups of dolphins were observed off the coast of Iceland near Keflavik in 1998. The data here give the time of the day and the main activity of the group, whether travelling quickly, feeding or socializing. The dolphin groups varied in size - usually feeding or socializing groups were larger than travelling groups. \n\n\n\n\nVariable\n\nDescription\n\n\n\n\nActivity\n\nMain activity of group: travelling (Travel), feeding (Feed) or socializing (Social)\n\nPeriod\n\nTime of the day: Morning, Noon, Afternoon or Evening\n\nGroups\n\nNumber of groups observed\n\n\n\n", "download": "http://www.statsci.org/data/general/dolpacti.txt", "filename": "dolpacti", "name": "Activities of Dolphin Groups", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "Cairns (1988) analysed the relation between population and foraging area for seabird colonies. The following table presents their data for 22 black-legged kittiwake (a northern gull) colonies of Scotland's Shetland and Orkney Islands. Area is km2 and Population is the number of breeding pairs. ", "download": "http://www.statsci.org/data/general/kittiwak.txt", "filename": "kittiwak", "name": "Kittiwake Colonies", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "Some handicapped people have access to trained monkey helpers that can perform household tasks like switching things on and off. This data set gives the number of tasks each of nine monkeys can perform along with the number of years the monkeys have been working with handicapped people. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nName \n\nName of monkey\n\nYears \n\nNumber of years the monkey has worked with handicapped people\n\nTasks \n\nNumber of tasks the monkey can perform\n", "download": "http://www.statsci.org/data/general/monkeys.txt", "filename": "monkeys", "name": "Trained Monkeys", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "Includes brain and body weight, life span, gestation time, time sleeping, and predation and danger indices for 62 species of mammals. Of interest is to predict the time spent sleeping and the proportion of sleep time in dream sleep. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nBodyWt\n\nbody weight (kg)\n\nBrainWt\n\nbrain weight (g)\n\nNonDreaming\n\nslow wave (\"nondreaming\") sleep (hrs/day)\n\nDreaming\n\nparadoxical (\"dreaming\") sleep (hrs/day)\n\nTotalSleep\n\ntotal sleep, sum of slow wave and paradoxical sleep (hrs/day)\n\nLifeSpan\n\nmaximum life span (years)\n\nGestation\n\ngestation time (days)\n\nPredation\n\npredation index (1-5)\n1 = minimum (least likely to be preyed upon); 5 = maximum (most likely to be preyed upon)\n\nExposure\n\nsleep exposure index (1-5)\n1 = least exposed (e.g. animal sleeps in a well-protected den); 5 = most exposed\n\nDanger\n\noverall danger index (1-5) (based on the above two indices and other information)\n1 = least danger (from other animals); 5 = most danger (from other animals)\n\n\n\n", "download": "http://www.statsci.org/data/general/sleep.txt", "filename": "sleep_", "name": "Sleep in Mammals", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "Results of horse races at Eagle Farm, Brisbane, on 31 August 1998. The data, collected by Donald Forbes for his MS305 Data Analysis Project, give results for each horse in a sequence of 8 races. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nPosition\n\nFinishing position\n\nStarters\n\nNumber of horses in race\n\nLast\n\nFinishing position in last race\n\nSince\n\nDays since last race\n\nNumber\n\nIdentifying number of horse in race\n\nCarried\n\nWeight carried\n\nWeight\n\nHandicap weight\n\nBarrier\n\nBarrier position at start of race\n\nDistance\n\nLength of race\n\nLengths\n\nNumber of lengths that horse finished from winner\n\nOdds\n\nStarting odds\n\nStarts\n\nNumber of races previously started in\n\nAge\n\nAge of horse in years\n\nRatio\n\nProportion of wins in previous starts\n", "download": "http://www.statsci.org/data/oz/horses.txt", "filename": "horses", "name": "Horse Racing at Eagle Farm", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "The data give the survival times (in 10 hour units) in a 3 x 4 factorial experiment, the factors being (a) three poisons and (b) four treatments. Each combination of the two factors is used for four animals, the allocation to animals being completely randomized. \n", "download": "http://www.statsci.org/data/general/poison.txt", "filename": "poison", "name": "Poison Experiment", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "The data was collected by Peter Drew and Matt Seidemann, statistics students at the Queensland University of Technology, in a subject taught by Dr Margaret Mackisack. Here is their description of the data and its collection: \n\"As keen fishermen out and about on a fairly regular basis, the common arguments arise between anglers on the best rigging set up for various conditions. We decided that upon our next group outing that we would back up our opinions with hard statistical facts. Our interest led us to test the most obvious variables in the fishing rig. \n\"Of interest were firstly the rod length, as between fisherman there always tends to be a variety of rods of different sizes; secondly the type of line, in that the larger the line it would be logical that the weight would increase; thirdly the sinker weight and how it affected the casting distance. \n\"In deciding on the three variables a 2^3 factorial design seemed obvious and for our purposes seemed to be quite adequate. So the question was placed as to whether or not the above variables in any combination made any difference to the overall distance the line was cast. The rods used were 6ft and 7ft two piece boat rods, fitted with the same type of spinning reel. The variable sinkers were 8oz and 12oz round ball sinkers and the line used was either the 1kg or 2kg line of the same make. \n\"The experiment was carried out on a day that was close to windless thus lowering the relative influence of the wind. The series of casts was conducted by the same person as were the measurements thus giving uniformity to the total experiment. A break of five minutes was timed between casts so as to allow the caster to allocate the same amount of energy to each cast. The rods were not rigged by the caster; a rigger would set the rod up with a combination of sinker, line and rod, and an effort was made to keep the caster oblivious to the changes in the rig. \n\"The experiment was conducted on the rugby ovals on Oleria St, Brookside (a western surburb of Brisbane) adjacent to the RSL (Returned Serviceman League club), which for all intents and purposes would be classified as a level surface. A line was placed at one end of the field and from it the caster would cast the rod as he would given normal fishing conditions. A spotter who was also the measurer would mark the point of impact of the sinker and from it measure back to the line from which it was cast. The distance observed was subsequently rounded up to the nearest 0.5 of a metre. Two runs were made of each combination. \n\"Possible improvements: Because of the time the rigging took, both casts with each rig were done at the same time. If we did it again it would be better to use random numbers to decide the order of all sixteen casts.\" ", "download": "http://www.statsci.org/data/oz/fishing.txt", "filename": "fishing_", "name": "Fishing Rod Experiment", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "Four male and four female turtles had their plasma protein measured while they were well fed and after ten and twenty days of fasting. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nSubject\n\n1-8\n\nSex\n\nMale or Female\n\nFed\n\nPlasma protein while well fed (mg/ml)\n\nFasted10\n\nPlasma protein after fasting 10 days\n\nFasted20\n\nPlasma protein after fasting 20 days\n", "download": "http://www.statsci.org/data/general/turtles.txt", "filename": "turtles", "name": "Plasma Protein of Fasting Turtles", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "Frogs of four species had their oxygen consumption measured at two temperatures and two exercise levels. There were two frogs of each species at each temperature, and each of the two was measured both at rest and during forced exercise. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nSubject\n\n1-16\n\nSpecies\n\n1-4\n\nTemperature\n\nLow or High\n\nRest\n\nOxygen consumption (ml O2/g/hr) at rest\n\nExercise\n\nOxygen consumption during exercise\n\n\n\n\n", "download": "http://www.statsci.org/data/general/frogs.txt", "filename": "frogs_", "name": "Oxygen Consumption of Frogs", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "The data give the age and the length of dugongs Dugong dugon (M�ller) captured near Townsville in north Queensland, Australia. The lifespan of a dugong is 50-60 years.\nThese data were working estimates. In particular the method of determining the age of dugong has changed somewhat since the data were recorded. \n\n\n\n\nVariable\n\nDescription\n\n\n\n\nAge\n\nAge in years\n\nLength\n\nLength in metres\n\n\n\n", "download": "http://www.statsci.org/data/oz/dugongs.txt", "filename": "dugongs", "name": "Age and Length of Dugongs near Townsville", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "The data give the sound pressure of sonar signals (\"clicks\") from a dolphin at various ranges to target. The measurements were made off the coast of Iceland near Keflavik in 1998. The pressure measurement given is \nraw pressure + a Range \nwhere a is a known constant depending on the water density. Pressure is expected to increase with distance even after the adjustment. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nRange \n\nDistance to dolphin in metres\n\nSoundPressure \n\nWater sound pressure adjusted for water density\n", "download": "http://www.statsci.org/data/general/dolphin.txt", "filename": "dolphin", "name": "Sound Pressure of Dolphin Sonar", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "The observed responses are Geiger counter counts (times 10-4) used to measure the amount of radioactively tagged sulfate drug in the blood of a baboon named Brunhilda after an injection of the drug. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nHours\n\nTime in hours since injection\n\nSulfate\n\nGeiger counter counts � 10-4\n", "download": "http://www.statsci.org/data/general/brunhild.txt", "filename": "brunhild", "name": "Blood Sulfate in a Baboon Named Brunhilda", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "The European rabbit Oryctolagus cuniculus is a major pest in Australia. A reliable method of age determination for rabbits caught in the wild would be of importance in ecological studies. In this study, the dry weight of the eye lens was measured for 71 free-living wild rabbits of known age. Eye lens weight tends to vary much less with environmental conditions than does total body weight, and therefore may be a much better indicator of age \nThe rabbits were born and lived free in an experimental 1.7 acre enclosure at Gungahlin, ACT. The birth data and history of each individual were accurately known. Rabbits in the enclosure depended on the natural food supply. In this experiment, 18 of the eye lenses were collected from rabbits that died in the course of the study from various causes such as coccidiosis, bird predation or starvation. The remaining 53 rabbits were deliberately killed, immediately after being caught in the enclosure or after they had been kept for some time in cages. The lenses were preserved and their dry weight determined. \n\n\n\n\nVariable\n\nDescription\n\n\n\n\nAge\n\nAge of rabbit in days\n\nLens\n\nDry weight of eye lens in milligrams\n", "download": "http://www.statsci.org/data/oz/rabbit.txt", "filename": "rabbit", "name": "Age and Eye Lens Weight for Rabbits in Australia", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "Insects were exposed to gaseous carbon disulphide for a period of 5 hours. Eight experiments were run with different concentrations of carbon disulphide. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nDose\n\nDose of carbon disulphide\n\nExposed\n\nNumber of beetles exposed\n\nMortality\n\nNumber of beetles killed\n", "download": "http://www.statsci.org/data/general/beetles.txt", "filename": "beetles", "name": "Beetle Mortality", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "Bill Venables writes: \nGroups of 20 snails were held for periods of 1, 2, 3 or 4 weeks in carefully controlled conditions of temperature and relative humidity. There were two species of snail, A and B, and the experiment was designed as a 4 by 3 by 4 by 2 completely randomized design. At the end of the exposure time the snails were tested to see if they had survived; the process itself is fatal for the animals. The object of the exercise was to model the probability of survival in terms of the stimulus variables, and in particular to test for differences between species. The data are unusual in that in most cases fatalities during the experiment were fairly small. \nSpecies\n \nSnail species A or B \nExposure\n \nExposure in weeks (4 levels) \nHumidity\n \nRelative humidity (4 levels) \nTemp\n \nTemperature in degrees Celsius (3 levels) \nDeaths\n \nNumber of deaths \nN \n \nNumber of snails exposed \n", "download": "http://www.statsci.org/data/oz/snails.txt", "filename": "snails_", "name": "Snail Mortality", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "Activity of individually caged fiddler crabs under constant conditions for 225 consecutive hours (225 = 9*25 = 9*24 + 8). The activity scale is log(y+1) where y is mean minutes per hour. Examination of the data suggests that the logarithm was base 10. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nActivity\n\nlog(Minutes per hour+1)\n", "download": "http://www.statsci.org/data/general/fiddler.txt", "filename": "fiddler", "name": "Activity of Fiddler Crabs", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "Tidal shrimps from the Brisbane River move up and down the tidal area (harbour pylon for example) in accordance with the movement of the tides. In this experiment shrimps were removed from their natural environment and isolated from environmental stimulae which would allow them to measure time. Their vertical position on an inclined slope was recorded every half hour starting 20 hours after removal and continuing for one week. Also recorded is the actual tide height during the same period, and six other measures of the shrimps' activity. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nTime \n\nHours since isolation\n\nVertical \n\nVertical displacement from original position\n\nY2 - Y7 \n\nOther activity measurements\n\nTide \n\nActual tide height\n", "download": "http://www.statsci.org/data/oz/shrimp.txt", "filename": "shrimp_", "name": "Movement of Tidal Shrimps in Isolation", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "Monthly total number of pigs slaughtered in Victoria, from January 1980 to August 1995.", "download": "http://www.statsci.org/data/oz/pigs.txt", "filename": "pigs", "name": "Pigs Slaughtered in Victoria", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "Seed predators and herbivores can operate as strong selective agents in the evolution of plant defence. In this context, Delpino (1886) posed the \"ant-guard\" hypothesis to explain the role of extrafloral nectaries on plants. Extrafloral nectaries (EFN), distributed on species in over 80 plant families, occur on vegetative organs and \"outer floral parts\" not directly associated with pollination. Basically, the hypothesis states that extrafloral nectar production attracts pugnacious \"bodyguards\" (usually ants) which by their foraging activities deter the activities of herbivorous insects and seed predators. \nSince its inception, the ant-guard hypothesis has remained controversial. A few careful studies have experimentally demonstrated that ants attending EFN protect plants (von Wettstein, 1889; Inouye and Taylor, 1979; Schemske, 1980) while several recent studies showed no effect (O’Dowd and Catchpole, 1983; Tempel, 1983; Boecklen, 1984). O’Dowd and Catchpole (1983), for example, found that attendance of ants at EFN deterred other insects from developing flowerheads but that their presence decreased neither the numbers of seed predators nor damage to developing flowerheads. The object of this paper is to describe the ant-insect interactions by means of a simple probability model. \nFull experimental detail is provided by O'’Dowd and Catchpole (1983) but an outline is as follows. The plants studied were helichrysum bracteatum. Three sites were chosen in clearings in the Tallaganda State forest, 40 km. southeast of Canberra, and at each site ten pairs of plants were studied. Plants within each pair were of similar initial size and less than 1 metre apart. Within each pair, ants were excluded from one plant, while the other served as a control. The plants were censused once a week for 17 weeks over the reproductive season (from initiation of flowerheads through the postflowering phase). The data recorded for each plant included the number of flowerheads (capitula), the number of capitula with ants, and the total number of other insects. Different species of ants (predominantly Iridomyrmex spp.) and other insects were observed, but in the data here are pooled within each general category. \nTo clarify: the first column (Week) lists the week the observation was made, the second (Index) lists the index given to the pair of plants observed, the third (AntCap) is the number of capitula on the plant with ant access, the fourth (ExcCap) is the number of capitula on the plant excluded from ant access, the fifth (Ants) is the number of capitula that have ants present on them, the sixth column (AntIns) is the number of insects on the plant with ant access, and the seventh (ExcIns) is the number of insects on the plant excluded from ant access. Index number 1-10 refer to Site 1, 11-20 to Site 2 and 21-30 to Site 3.", "download": "http://www.statsci.org/data/oz/ants.txt", "filename": "ants", "name": "Ant-Insect Interactions on Flowerheads", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "A new type of heart valve has been developed and is implanted in 63 dogs that have been raised on various levels of exercise. The numbers of valve transplants that succeed are recorded. Is the proportion of successful implants the same for dogs on all exercise regimens? Is there a trend with amount of exercise in the proportion of successful implants? \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nExercise\n\nAmount of exercise: 1=None, 2=Slight, 3=Moderate, 4=Vigorous\n\nImplant\n\n1=Successful, 2=Unsuccessful\n\nFrequency\n\nNumber of dogs\n\n\n\n", "download": "http://www.statsci.org/data/general/exervalv.txt", "filename": "exervalv", "name": "Heart Valves in Dogs on Different Exercise Regimens", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "The data give growth measurements on Tammar wallabies (Macropus eugenii). Each line is a set of measurements on an animal at a particular time. Most lengths are in tenths of millimetres. The data from some animals is very fragmentary. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nAnim\n\nAnimal number\n\nSex\n\n1=male, 2=female\n\nLoca\n\nLocation of animal\n\nLeng\n\nLength of animal (tenths of a millimetre)\n\nHead\n\nHead length\n\nEar\n\nEar Length\n\nArm\n\nArm length\n\nLeg\n\nLeg length\n\nPres\n\nPes (foot) length\n\nTail\n\nTail length\n\nWeight\n\nWeight (tenths of a gram)\n\nAge\n\nAge in days from birth\n", "download": "http://www.statsci.org/data/oz/wallaby.txt", "filename": "wallaby", "name": "Dryandra Tammar WallabyGrowth of Tammar Wallabies", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "Following the Second World War, D. L. Serventy carried out a detailed study of the lifecycle of the Tasmanian muttonbird (Puffinus tenuirostris, often called the short-tailed shearwater). The data here concerns the growth pattern of fledgling birds of this species. \nAfter the eggs hatch, the parent birds spend much time away from the next, and with increasing time their returns become rarer and rarer. When they return the young bird feed copiously, and there is very rapid weight-gain; whilst they are absent, the offspring loses weight. The result is not a smooth growth curve such as one finds in most measurements in developing animals and birds, but a 'sawtooth' effect. The data were collected in 1954 as weighings each morning of two fledgling chicks on Fisher Island, Bass Strait, and each set terminates on the day the chick left the nest. \nMuch of the interest in these curves comes not from the description they give of the weight of the chick, but from the information they contain on the feeding patterns of the parents. There are three obvious features of the data; the timing of the feeds and the size of the feeds when they occur, both of which represent aspects of the feeding pattern of the parents; and the loss in weight of the chicks between feeds. Henstridge and Tweedie (1984) proposed a model, similar to those used in storage theory, which describes each of these phenomena separately.", "download": "http://www.statsci.org/data/oz/muttonbi.txt", "filename": "muttonbi", "name": "Growth of Tasmanian Muttonbirds", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "Dimensions in millimetres are given of two samples of jellyfish from Hawkesbury River in New South Wales, Australia. One of the samples came from Dangar Island and the other from Salamander Bay. The first column contains a \"D\" if the measurement came from Dangar Island and a \"S\" if it came from Salamander Bay. The dimensions measured were length and width. What can one learn from graphing the two principal components? Try graphing principal components of the logarithms of the measurements. Can the dimensions determine the location?", "download": "http://www.statsci.org/data/oz/jellfish.txt", "filename": "jellfish", "name": "Dimensions of Jellyfish", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "A study was conducted concerning the counts of lesions produced on membranes of chick embryos by viruses of the pox group. The data give the numbers of lesions formed at a series of dilutions of the viral medium. \n\n\n\n\nVariable\n\nDescriptions\n\n\n\n\nDilution\n\nDilution of viral medium, from 1 to 32\n\nCount\n\nNumber of lesions\n", "download": "http://www.statsci.org/data/general/pocklesi.txt", "filename": "pocklesi", "name": "Pock Lesions on Chick Embryos", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "This data comes from an experiment on induction of flowering of cyclamen. Plants of 4 varieties of cyclamen were subject to a combination of 6 temperature regimens and 4 levels of fertilization. The temperature regimens are combinations of five temperatures during the day (14, 16, 18, 20 and 26 degrees C) and four temperatures during the night (14, 16, 18 and 20 C). Not all the combinations of temperatures are present. The response is the number of flowers, which vary from 4 to 26, with mode 8. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nVariety\n\nVariety of cyclamen\n\nRegimem\n\nTemperature regimen (combination of the temperature during the day and the temperature during the night)\n\nDay\n\nTemperature during the day (Centigrade)\n\nNight\n\nTemperature during the night\n\nFertilizer\n\nLevel of fertilization\n\nFlowers\n\nNumber of flowers\n", "download": "http://www.statsci.org/data/general/cyclamen.txt", "filename": "cyclamen", "name": "Number of Cyclamen Flowers", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "In an experiment where pregnant mice were exposed to the herbicide 2,4,5-T (the active component in Agent Orange), the number of fetal implants in utero were recorded. The data give the frequency distribution of implants at each of seven dose levels measured in mg/kg of body weight. \nOn days 6-14 after mating, pregnant dams were dosed by gavage with one of the doses of 2,4,5-T. Prior to giving birth, the dams were sacrificed and the number of viable, dead and reabsorbed foetuses in the uterus of the dam were determined. The data here gives the number of surviving viable implants. An outcome of zero implants cannot be distinguished from a non-pregnant outcome so any zero implant outcomes were excluded. \n\n\n\n\nVariable\n\nDescriptions\n\n\n\n\nDose\n\nDose of 2,4,5-T in mg/kg/day\n\nImplants\n\nNumber of surviving implants\n\nFrequency\n\nNumber of mice with that number of implants\n", "download": "http://www.statsci.org/data/general/fetaimpl.txt", "filename": "fetaimpl", "name": "Fetal Implants in Mice Utero", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true + }, + { + "DateTime_format": "yyyy-MM-dd", + "comment_character": "#", + "create_index_column": false, + "description": "-", + "download": "https://dasl.datadescription.com/download/data/3074", + "filename": "Bird-Species-2013", + "name": "Bird-Species-2013", + "number_format": 31, + "remove_quotes": true, + "separator": "TAB", + "simplify_whitespaces": false, + "skip_empty_parts": true, + "use_first_row_for_vectorname": true } ], "subcategory_name": "Animals" }, { "datasets": [ { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "Risk and Sammarco (1991) found that the density of the Great Barrier Reef coral Porites lobata increases with distance from the Australian shore, due to differences between inshore and offshore environments. They made three measurements at each of nine reefs at various distances from the shore. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nReef\n\nName of reef\n\nDistance\n\nDistance to shore (km)\n\nDensity\n\nCoral head density (g/cm3)\n", "download": "http://www.statsci.org/data/oz/coralden.txt", "filename": "coralden", "name": "Density of Great Barrier Reef Coral Heads", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "The data give the volume (cubic feet), height (feet) and diameter (inches) (at 54 inches above ground) for a sample of 31 black cherry trees in the Allegheny National Forest, Pennsylvania. The data were collected in order to find an estimate for the volume of a tree (and therefore the timber yield), given its height and diameter. ", "download": "http://www.statsci.org/data/general/cherry.txt", "filename": "cherry", "name": "Volume of Black Cherry Trees", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "The data is from a dew-retting experiment in Ballarat 1942-43, in which flax was laid out under various climactic conditions and for various periods. Retting involves softening the flax stems by soaking in water, thus enabling the separation of the linen fibres from the wooden material by a process called scrutching. The flax variety used was \"Liral Crown\". Two samples were taken from each trial and the ret loss, as a percentage, was calculated. The other three variables are the mean daily rainfall (in points), the retting period (in days) and the mean daily temperature (in degrees Farenheit).", "download": "http://www.statsci.org/data/oz/retloss.txt", "filename": "retloss", "name": "Ret Loss in Flax", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "A production plant cost-control engineer is responsible for cost reduction. One of the costly items in his plant is the amount of water used by the production facilities each month. He decided to investigate water usage by collecting seventeen observations on his plant's water usage and other variables. \n\n\n\n\nVariable\n\nDescription\n\n\n\n\nTemperature\n\nAverage monthly temperate (F)\n\nProduction\n\nAmount of production (M pounds)\n\nDays\n\nNumber of plant operating days in the month\n\nPersons\n\nNumber of persons on the monthly plant payroll\n\nWater\n\nMonthly water usage (gallons)\n", "download": "http://www.statsci.org/data/general/water.txt", "filename": "water_", "name": "Water Usage of Production Plant", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "Ryan et al (1994) describe the data as follows: \nIn autumn, small winged fruit called samara fall off maple trees, spinning as they go. A forest scientist studied the relationship between how fast they fell and their \"disk loading\" (a quantity based on their size and weight). The samara disk loading is related to the aerodynamics of helicopters. \nThe data give the loadings and fall velocities for fruit from three trees. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nTree\n\n1 to 3\n\nLoading\n\nDisk loading\n\nVelocity\n\nFall velocity\n", "download": "http://www.statsci.org/data/general/samara.txt", "filename": "samara", "name": "Fall Velocities for Samara Fruit", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "The yield of pasture regrowth was measured together with the number of days since last grazing. The measurements were done on different experimental units so it is reasonable to assume the errors independent. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nDays\n\nDays since last grazing\n\nYield\n\nYield of pasture\n", "download": "http://www.statsci.org/data/general/regrowth.txt", "filename": "regrowth", "name": "Pasture Regrowth after Grazing", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "Herbicide bioassay is concerned with the reduction in plant growth as a function of the herbicide dose applied. This is a interest when developing new herbicides, assessing environmental effects on non-target species or estimating the residual herbicides in a treated soil before planting a new, herbicide suspectible crop. A typical experiment would comprise a series of doses ranging from ineffective to severely damaging to establish a dose-response relationship. In this experiment the callus area of a tissue culture of Brassica napus was measured corresponding to different doses of a sulfonylurea herbicide, metsulfuron methyl. \n\n\n\n\nVariable\n\nDescription\n\n\n\n\nChlorsulfuron\n\nConcentration of herbicide in nmol/L\n\nCallus\n\nLogarithm of callus area\n", "download": "http://www.statsci.org/data/general/brassica.txt", "filename": "brassica", "name": "Response of Brassica napus to Chlorsulfuron", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "The data concerns the underground root system of eight separate apple trees. Three different root stocks are considered (Mark, MM106 and M26) and two plant spacing (4x2 meters and 5x3 meters). For each plant, soil core sampling units taken have been classified as belonging to an inner or outer zone. The response variable is the density of fine roots, also called the root length density, which can have zeros as well as continuous positive values. There are 511 observations, of which 193 or 38% have a zero response. \nThe design is not a full factorial design: plants 1 and 2 are tested only with the Mark root stock and at a spacing of 5x3; plants 3 and 4 are tested only with Mark root stock at a spacing of 4x2; plants 5 and 6 are tested only with root stock MM106 at a spacing of 5x3; and plants 7 and 8 are tested only with M26 root stock at a spacing of 4x2. The Mark root stock is tested at both plant spacings but the MM106 only at 5x3 and M26 only at 4x2. So there are four unique treatment combinations: Mark stock at 5x3 and 4x2, MM106 at 5x3, and M26 at 4x2. \nIt is of interest to (1) compare effects of spacing within Mark rootstock, (2) compare root stocks within same spacing and (3) to look for any difference in RLD between inner and out zones. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nPlant \n\n1 to 8\n\nStock\n\nRoot stock: Mark, MM106 or M26\n\nSpacing\n\nPlant spacing: 5x3 or 4x2 meters\n\nZone\n\nZone relative to the plant the soil core is taken from: Inner or Outer\n\nRLD\n\nRoot length density in cm/cm3\n", "download": "http://www.statsci.org/data/oz/fineroot.txt", "filename": "fineroot", "name": "Root Length Density of Apple Trees", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true } ], "subcategory_name": "Plants" }, { "datasets": [ { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "Data were collected from a mine in Cobar, NSW, Australia. At each of 38 sampling points, several measurements were taken, one of which is the 'true-width' of an ore-bearing rock layer. Also given are the co-ordinates t1 and t2 of of the data sites. Green and Silverman (1994) use this data set to illustrate thin-plate splines for fitting a smooth surface.", "download": "http://www.statsci.org/data/oz/ore.txt", "filename": "ore", "name": "Wide of Ore-Bearing Layer", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "The proportions of sand, silt and clay in soil samples are given for 8 contiguous sites. The sites extended over the crest and flank of a low rise in a valley underlain by marl near Albudeite in the province of Murcia, Spain. The sites were small areas of ground surface of uniform shape internally and delimited by relative discontinuities externally. Soil samples were obtained for each site at 11 random points within a 10m by 10m area centred on the mid-point of the site. All samples were taken from the same depth. The data give the sand, silt and clay content of each sample, expressed as a percentage of the total sand, silt and clay content. \nThe purpose of the study by Wright and Wilson (1979) was to determine whether the sites could be differentiated on the basis of their soil composition. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nSite \n\n1-8\n\nSand \n\nPercent sand\n\nSilt\n\nPercent silt\n\nClay\n\nPercent clay\n", "download": "http://www.statsci.org/data/general/murcia.txt", "filename": "murcia", "name": "Composition of Soil from Murcia Province, Spain", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true } ], "subcategory_name": "Geology" + }, + { + "datasets": [ + { + "DateTime_format": "yyyy-MM-dd", + "comment_character": "#", + "create_index_column": false, + "description": "-", + "download": "https://dasl.datadescription.com/download/data/3094", + "filename": "Carbon-footprint", + "name": "Carbon footprint", + "number_format": 31, + "remove_quotes": true, + "separator": "TAB", + "simplify_whitespaces": false, + "skip_empty_parts": true, + "use_first_row_for_vectorname": true + }, + { + "DateTime_format": "yyyy-MM-dd", + "comment_character": "#", + "create_index_column": false, + "description": "-", + "download": "https://dasl.datadescription.com/download/data/3095", + "filename": "Carbon-footprint-2015", + "name": "Carbon footprint 2015", + "number_format": 31, + "remove_quotes": true, + "separator": "TAB", + "simplify_whitespaces": false, + "skip_empty_parts": true, + "use_first_row_for_vectorname": true + } + ], + "subcategory_name": "Other" } ] }, { "category_name": "Statistics", "subcategories": [ { "datasets": [ { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "The data give the year of founding for 40 New Zealand wineries.", "download": "http://www.statsci.org/data/oz/wineries.txt", "filename": "wineries", "name": "Founding Dates of NZ Wineries", "number_format": 31, "remove_quotes": true, "separator": "auto", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "The data give the selling price at auction of 32 antique grandfather clocks. Also recorded is the age of the clock and the number of people who made a bid. ", "download": "http://www.statsci.org/data/general/auction.txt", "filename": "auction", "name": "Selling Price of Antique Grandfather Clocks", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "The two columns of the data are the prices and year purchased for 124 Mazda cars, as taken from the classified section of the Melbourne Age during the course of 1991. Hence the age of the car at the time can be calculated and used to model car price. ", "download": "http://www.statsci.org/data/oz/mazdas.txt", "filename": "mazdas", "name": "Age and Price of Mazda Cars", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "The data show the capital value and annual rental value of 96 domestic properties in Auckland in 1991. The aim was to explore their relationship in the hope of being able to predict capital value from rental value, thus the latter is the explanatory variable in this case.", "download": "http://www.statsci.org/data/oz/rentcap.txt", "filename": "rentcap", "name": "Capital and Rental Values of Auckland Properties", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "The data give the selling price at auction of 32 antique grandfather clocks. Also recorded is the age of the clock and the number of people who made a bid. \n\n\n\n\nVariable\n\nDescription\n\n\n\n\nAge\n\nAge of the clock (years)\n\nBidders\n\nNumber of individuals participating in the bidding\n\nPrice\n\nSelling price (pounds sterling)\n", "download": "http://www.statsci.org/data/general/auction.txt", "filename": "auction_", "name": "Selling Price of Antique Grandfather Clocks", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "The data were collected to study the variation in rent paid in 1977 for agricultural land planted to alfalfa in Minnesota. The data include: \n\n\nVariable\n\nDescription\n\n\nRent\n \naverage rent per acre planted to alfalfa\nAllRent\n \naverage rent paid for all tillable land\nCows\n \ndensity of diary cows (number per square mile)\nPasture\n \nproportion of farmland used as pasture\nLiming\n \nYes if liming is required to grow alfalfa; No otherwise\n", "download": "http://www.statsci.org/data/general/landrent.txt", "filename": "landrent", "name": "Rent for Land Planted to Alfalfa", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "Monthly observations on various share price and financial variables were recorded from October 1991 to August 1997. Data collected by Francine Pritchard and Glen Dixon for their MS305 data analysis project in 1997. \n\n\nVariable\n\nDescription\n\n\nBank\n\nShare Price Index\nAllOrds\n\n\nDevelop\n\n\nMining\n\n\nGold\n\n\nBuild\n\n\nProp\n\n\nIndust\n\n\nEnergy\n\n\nFinance\n\n\nResource\n\n\nTransport\n\n\nRetail\n\n\nUnemploy\n\nUnemployment Rate\nCPI\n\nConsumer Price Index\nBankBill\n\n90 Day Bank Bill Interest Rate\n", "download": "http://www.statsci.org/data/oz/bankbill.txt", "filename": "bankbill", "name": "90 Day Bank Bills", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "The following data was collected in the 1960s at a house in south-east England. The weekly gas consumption (in 1000 cubic feet) and the average outside temperature (in degrees Celsius) was recorded for 26 weeks before and 30 weeks after cavity-wall insulation had been installed. The house thermostat was set at 20�C throughout. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nInsulate\n\nBefore or After\n\nTemp\n\nAverage outside temperature (C)\n\nGas\n\nGas consumption (1000's of cubic feet)\n", "download": "http://www.statsci.org/data/general/insulgas.txt", "filename": "insulgas", "name": "House Insulation and Gas Consumption", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "Age specific term life premium rates for a sum insured of $50,000 are given in the table. The first column is the age of insured, the next two columns are the rates for male smokers and non-smokers, and the last two columns are the rates for female smokers and non-smokers. The four separate sets of points may be plotted and cubic spline regression used to fit them.", "download": "http://www.statsci.org/data/oz/insure.txt", "filename": "insure", "name": "Insurance Premiums", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "The data give the Canadian automobile insurance experience for policy years 1956 and 1957 as of June 30, 1959. The data includes virtually every insurance company operating in Canada and was collated by the Statistical Agency (Canadian Underwriters' Association - Statistical Department) acting under instructions from the Superintendent of Insurance. The data given here is for private passenger automobile liability for non-farmers for all of Canada excluding Saskatchewan. \nThe variable Merit measures the number of years since the last claim on the policy. The variable Class is a collation of age, sex, use and marital status. The variables Insured and Premium are two measures of the risk exposure of the insurance companies. \n\n\n\n\nVariable\n\nDescription\n\n\n\n\nMerit\n\nMerit Rating:\n3 - licensed and accident free 3 or more years\n2 - licensed and accident free 2 years\n1 - licensed and accident free 1 year\n0 - all others\n\nClass\n\n1 - pleasure, no male operator under 25\n2 - pleasure, non-principal male operator under 25\n3 - business use\n4 - unmarried owner or principal operator under 25\n5 - married owner or principal operator under 25\n\nInsured\n\nEarned car years\n\nPremium\n\nEarned premium in 1000's\n(adjusted to what the premium would have been had all cars been written at 01 rates)\n\nClaims\n\nNumber of claims\n\nCost\n\nTotal cost of the claim in 1000's of dollars\n", "download": "http://www.statsci.org/data/general/carinsca.txt", "filename": "carinsca", "name": "Canadian Automobile Insurance Claims for 1957-1958", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "The data give details of third party motor insurance claims in Sweden for the year 1977. \n\"In Sweden all motor insurance companies apply identical risk arguments to classify customers, and thus their portfolios and their claims statistics can be combined. The data were compiled by a Swedish Committee on the Analysis of Risk Premium in Motor Insurance. The Committee was asked to look into the problem of analyzing the real influence on claims of the risk arguments and to compare this structure with the actual tariff.\" \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nKilometres \n\nKilometres travelled per year\n1: < 1000\n2: 1000-15000\n3: 15000-20000\n4: 20000-25000\n5: > 25000\n\nZone \n\nGeographical zone\n1: Stockholm, Göteborg, Malmö with surroundings\n2: Other large cities with surroundings\n3: Smaller cities with surroundings in southern Sweden\n4: Rural areas in southern Sweden\n5: Smaller cities with surroundings in northern Sweden\n6: Rural areas in northern Sweden\n7: Gotland \n\nBonus\n\nNo claims bonus. Equal to the number of years, plus one, since last claim\n\nMake\n\n1-8 represent eight different common car models. All other models are combined in class 9\n\nInsured\n\nNumber of insured in policy-years\n\nClaims\n\nNumber of claims\n\nPayment\n\nTotal value of payments in Skr\n\n\n\n", "download": "http://www.statsci.org/data/general/motorins.txt", "filename": "motorins", "name": "Third Party Motor Insurance in Sweden", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "The data give the average claims for damage to the owner's car for privately owned and comprehensively insured vehicles in Britain in 1975. Averages are given in pounds sterling adjusted for inflation. \n\n\n\n\nVariable\n\nDescription\n\n\n\n\nOwnerAge\n\nPolicy-holder's age in years, categorized into 8 levels\n\nModel\n\nType of car, in 4 groups\n\nCarAge\n\nVehicle age in years, categorized into 4 levels\n\nNClaims\n\nNumber of claims\n\nAveCost\n\nAverage cost of each claim in pounds\n", "download": "http://www.statsci.org/data/general/carinsuk.txt", "filename": "carinsuk", "name": "British Car Insurance Claims for 1975", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "Monthly data relating to hotels, motels and guesthouses in Victoria, from January 1980 to June 1995. First column: total number of room nights occupied; Second column: total takings from accommodation. ", "download": "http://www.statsci.org/data/oz/motel.txt", "filename": "motel", "name": "Hotels, Motels and Guesthouses in Victoria", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "The data list the CPI (Consumer Price Index) figures for five countries from 1985 to 1994. The countries are Australia, Canada, New Zealand, the United Kingdom and the United States. Each index is based on the December Quarter 1993 (1000).", "download": "http://www.statsci.org/data/oz/cpifive.txt", "filename": "cpifive", "name": "CPI for Five Countries", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "Quarterly CPI indices for Brisbane for food, clothing, housing etc, from June 1972 to September 1997. \nThe groups are: Food, Clothing, Housing, Household equipment and operation, Transportation, Tobacco and Alcohol, Health and personal care, Recreation and education, and All groups. The CPI are standardized so that the year 1989-90 is 100.0. ", "download": "http://www.statsci.org/data/oz/cpibris.txt", "filename": "cpibris", "name": "Brisbane Consumer Price Indices", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "Quarterly price indices for established homes in Australian capital cities, from June 1986 to June 1997. The price indices are standardized so that the year 1989-1990 is 100.0 for each city. ", "download": "http://www.statsci.org/data/oz/houses.txt", "filename": "houses", "name": "House Price Indexes", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true - } - ], - "subcategory_name": "Economics" - }, - { - "datasets": [ + }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "The following description is from Lee (1994): \nRugby football is a popular quasi-amateur sport widely played in the United Kingdom, France, Australia, New Zealand and South Africa. It is rapidly gaining popularity in the US, Canada, Japan and parts of Europe. Recently, some of the rules of the game have been changed, with the aim of making play more exciting. In a study to examine the effects of the rule changes, Hollings and Triggs (1993) collected data on some recent games. \nTypically, a game consists of bursts of activity which terminate when points are scored, if the ball is moved out of the field of play or if an infringement of the rules occurs. In 1992, the investigators gathered data on ten international matches which involved the New Zealand national team, the All Blacks. The first five games studied were the last international games played under the old rules, and the second set of five were the first internationals played under the new rules. \nFor each of the ten games, the data list the successive times (in seconds) of each passage of play in that game. One interest is to see whether the passages were on average longer or shorter under the new rules. (The intention when the rules were changed was almost certainly to make the play more continuous.) ", - "download": "http://www.statsci.org/data/oz/rugby.txt", - "filename": "rugby", - "name": "Time of Passages of Play in Rugby", + "description": "To shorten the time it takes him to make his favorite pizza, a student designed an experiment to test the effect of sugar and milk on the activation times for baking yeast. Specifically, he tested four different recipes and measured how many seconds it took for the same amount of dough to rise to the […] ", + "download": "https://dasl.datadescription.com/download/data/3042", + "filename": "activating-baking-yeast", + "name": "Activating baking yeast", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "Mark Taylor was Captain of the Australian test cricket team from May 1994 until February 1999. By the middle of 1997, the Australian team has won its 7 consecutive international test series, making Taylor the most successful Australian Captain in history. However his poor batting form from mid 1996 to mid 1997 gave the Australian selectors a dilemma in deciding whether his excellent Captaincy made up for the run of poor scores off his own bat. \nThe data below gives Mark Taylor's test scores from the middle of 1989 to the middle of 1995, a period over which he was batting well. Scores were made in Australia's first or second innings of each match. Sometimes Australia was not required to bat twice, in which case the second innings is marked as missing. There are also a number of `not outs'.", - "download": "http://www.statsci.org/data/oz/taylor.txt", - "filename": "taylor_", - "name": "Mark Taylor's Test Cricket Scores", + "description": "The American International Group (AIG) was once the 18th largest corporation in the world. By early 2007 AIG had assets of $1 trillion, $110 billion in revenues, 74 million customers and 116,000 employees in 130 countries and jurisdictions. Yet just 18 months later, AIG found itself on the brink of failure and in need of emergency […] ", + "download": "https://dasl.datadescription.com/download/data/3046", + "filename": "AIG-daily", + "name": "AIG daily", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "The Sydney-Hobart yacht race starts from Sydney Harbour on Boxing day (December 26) and finishes several days later in Hobart. It is a 630 nautical mile ocean race. The data give the winning times from 1945 to 1993, as they appeared in the Sydney Morning Herald on 24 December, 1994, plus the winning times for 1994 to 1997. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nYacht\n\nName of winning yacht\n\nYear\n\nYear\n\nDays\n\nDays unit of winning time\n\nHours\n\nHours unit of winning time\n\nMinutes\n\nMinutes unit of winning time\n\nTime\n\nWinning time in minutes (should match time in Days, Hours and Minutes)\n", - "download": "http://www.statsci.org/data/oz/sydhob.txt", - "filename": "sydhob", - "name": "Sydney to Hobart Yacht Race Winning Times", + "description": "The American International Group (AIG) was once the 18th largest corporation in the world. By early 2007 AIG had assets of $1 trillion, $110 billion in revenues, 74 million customers and 116,000 employees in 130 countries and jurisdictions. Yet just 18 months later, AIG found itself on the brink of failure and in need of emergency […]", + "download": "https://dasl.datadescription.com/download/data/3047", + "filename": "AIG-monthly", + "name": "AIG monthly", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "Winning heights or distances (inches) for the High Jump, Discus and Long Jump events at the Olympics up to 1996. ", - "download": "http://www.statsci.org/data/general/olympic.txt", - "filename": "olympic", - "name": "Olympic Records for High Jump, Discus and Long Jump", + "description": "A sample of model 2011 cars from an online information service colleted to see how fuel efficiency (as highway mpg) relates to the cost (MSRP) ", + "download": "https://dasl.datadescription.com/download/data/3050", + "filename": "All-the-efficiency", + "name": "All the efficiency", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "The data give the number of medals won by each medal-winning country in the 1992 Summary Olympic Games in Barcelona, Spain, and the 1994 Winter Olympic Games in Lillehammer, Norway. Also given is the population and latitude of each country. Griffiths et al write: \n... the media spent a lot of time discussing the number of medals won by each country's athletes. The implication was that the comparison was of some importance. However, larger countries would be expected to win more medals than smaller countries, simply because of their larger populations. \n... some viewers, especially those from the smaller countries, felt that the number of medals should be standardised to account for the very wide range of populations, and that a per capita number of medals for a country was a fairer comparison. Others felt that this was unfair to the countries with larger populations - that having twice as many people did not lead to twice as many medals. If standardisation is performed adequately, there should be no systematic relationship between the adjusted medal count and population. \nAlso countries further from the equator might be expected to do better in the winter olympics. \nThe data is incomplete in that countries with no medals are not included. These would be mostly smaller population countries. ", - "download": "http://www.statsci.org/data/oz/medals.txt", - "filename": "medals", - "name": "Olympic Medals", + "description": "The price of delicious apples and regular gas are components of the Consumer Price Index. The data give those prices monthly for the year 2006 ", + "download": "https://dasl.datadescription.com/download/data/3055", + "filename": "Apples-and-gas", + "name": "Apples and gas", + "number_format": 31, + "remove_quotes": true, + "separator": "TAB", + "simplify_whitespaces": false, + "skip_empty_parts": true, + "use_first_row_for_vectorname": true + }, + { + "DateTime_format": "yyyy-MM-dd", + "comment_character": "#", + "create_index_column": false, + "description": "You have decided to invest in a bond fund and plan to limit your choice of funds to Morningstar “medalist” funds. But now you must choose between a taxable fund and a municipal bond fund that is at least partially tax-free. Which is better? Here are the % returns for the three-year period leading up", + "download": "https://dasl.datadescription.com/download/data/3080", + "filename": "Bond-funds", + "name": "Bond funds", + "number_format": 31, + "remove_quotes": true, + "separator": "TAB", + "simplify_whitespaces": false, + "skip_empty_parts": true, + "use_first_row_for_vectorname": true + }, + { + "DateTime_format": "yyyy-MM-dd", + "comment_character": "#", + "create_index_column": false, + "description": "Number of sales people working in a bookstore and sales (in $1000) that day. These are realistic but invented data. ", + "download": "https://dasl.datadescription.com/download/data/3081", + "filename": "Bookstore-sales", + "name": "Bookstore sales", + "number_format": 31, + "remove_quotes": true, + "separator": "TAB", + "simplify_whitespaces": false, + "skip_empty_parts": true, + "use_first_row_for_vectorname": true + }, + { + "DateTime_format": "yyyy-MM-dd", + "comment_character": "#", + "create_index_column": false, + "description": "In 2015, the website NewGeography.com listed its ranking of the best cities for job growth in the United States. Nonfarm employment is also provided", + "download": "https://dasl.datadescription.com/download/data/3082", + "filename": "Boomtowns-2015", + "name": "Boomtowns 2015", + "number_format": 31, + "remove_quotes": true, + "separator": "TAB", + "simplify_whitespaces": false, + "skip_empty_parts": true, + "use_first_row_for_vectorname": true + }, + { + "DateTime_format": "yyyy-MM-dd", + "comment_character": "#", + "create_index_column": false, + "description": "Home prices in two neighborhoods near San Francisco. Palo Alto is an older neighborhood and Foster City, a newer one. How do prices compare?", + "download": "https://dasl.datadescription.com/download/data/3104", + "filename": "CA-House-Prices", + "name": "CA House Prices", + "number_format": 31, + "remove_quotes": true, + "separator": "TAB", + "simplify_whitespaces": false, + "skip_empty_parts": true, + "use_first_row_for_vectorname": true + }, + { + "DateTime_format": "yyyy-MM-dd", + "comment_character": "#", + "create_index_column": false, + "description": "-", + "download": "https://dasl.datadescription.com/download/data/3097", + "filename": "Car-discounts", + "name": "Car discounts", + "number_format": 31, + "remove_quotes": true, + "separator": "TAB", + "simplify_whitespaces": false, + "skip_empty_parts": true, + "use_first_row_for_vectorname": true + }, + { + "DateTime_format": "yyyy-MM-dd", + "comment_character": "#", + "create_index_column": false, + "description": "-", + "download": "https://dasl.datadescription.com/download/data/3098", + "filename": "Car-origins", + "name": "Car origins", + "number_format": 31, + "remove_quotes": true, + "separator": "TAB", + "simplify_whitespaces": false, + "skip_empty_parts": true, + "use_first_row_for_vectorname": true + }, + { + "DateTime_format": "yyyy-MM-dd", + "comment_character": "#", + "create_index_column": false, + "description": "The S&P/Case-Shiller Home Price Indices track changes in the value of residential real estate nationally and in 20 metropolitan regions. (Some of these indices are actually traded on the Chicago Mercantile Exchange.) The data set Case-Shiller by City gives the monthly index values for each of the 20 cities tracked by the Case-Shiller index and […] ", + "download": "https://dasl.datadescription.com/download/data/3102", + "filename": "Case-Shiller-by-city", + "name": "Case-Shiller by city", + "number_format": 31, + "remove_quotes": true, + "separator": "TAB", + "simplify_whitespaces": false, + "skip_empty_parts": true, + "use_first_row_for_vectorname": true + }, + { + "DateTime_format": "yyyy-MM-dd", + "comment_character": "#", + "create_index_column": false, + "description": "Beginning in 2017, public companies will be required to disclose the ratio of CEO pay to median worker pay. The Glassdoor Economic Research Blog has published the data for 2014. The data includes CEO identities, companies, CEO compensation, median worker compensation (compiled by Glassdoor), and the ratio of CEO to worker compensation.", + "download": "https://dasl.datadescription.com/download/data/3105", + "filename": "CEO-Compensation-2014", + "name": "CEO Compensation 2014", + "number_format": 31, + "remove_quotes": true, + "separator": "TAB", + "simplify_whitespaces": false, + "skip_empty_parts": true, + "use_first_row_for_vectorname": true + }, + { + "DateTime_format": "yyyy-MM-dd", + "comment_character": "#", + "create_index_column": false, + "description": "-", + "download": "https://dasl.datadescription.com/download/data/3106", + "filename": "CEO-Salary-2012", + "name": "CEO Salary 2012", + "number_format": 31, + "remove_quotes": true, + "separator": "TAB", + "simplify_whitespaces": false, + "skip_empty_parts": true, + "use_first_row_for_vectorname": true + }, + { + "DateTime_format": "yyyy-MM-dd", + "comment_character": "#", + "create_index_column": false, + "description": "Coffee is the world’s second largest\nlegal export commodity (after oil) and is the second largest\nsource of foreign exchange for developing nations. The\nUnited States consumes about one-fifth of the world’s coffee.\nThe International Coffee Organization (ICO) computes\na coffee price index using Colombian, Brazilian, and\na mixture of other coffee data. Data are provided for the\nmonthly average ICO price index (in $US) from Jan 2009 to December 2017c", + "download": "https://dasl.datadescription.com/download/data/3119", + "filename": "Coffee-prices-2017", + "name": "Coffee-prices-2017", + "number_format": 31, + "remove_quotes": true, + "separator": "TAB", + "simplify_whitespaces": false, + "skip_empty_parts": true, + "use_first_row_for_vectorname": true + }, + { + "DateTime_format": "yyyy-MM-dd", + "comment_character": "#", + "create_index_column": false, + "description": "The cost of a variety of common items in 576 cities around the world in $, adjusted so that New York, U.S.A. is 100.", + "download": "https://dasl.datadescription.com/download/data/3120", + "filename": "COLall-2016", + "name": "COLall 2016", + "number_format": 31, + "remove_quotes": true, + "separator": "TAB", + "simplify_whitespaces": false, + "skip_empty_parts": true, + "use_first_row_for_vectorname": true + }, + { + "DateTime_format": "yyyy-MM-dd", + "comment_character": "#", + "create_index_column": false, + "description": "Facts about companies selected from the Forbes 500 list for 1986. This is a 1/10 systematic sample from the alphabetical list of companies. The Forbes 500 includes all companies in the top 500 on any of the criteria, and thus has almost 800 companies in the list.", + "download": "https://dasl.datadescription.com/download/data/3125", + "filename": "Companies", + "name": "Companies", + "number_format": 31, + "remove_quotes": true, + "separator": "TAB", + "simplify_whitespaces": false, + "skip_empty_parts": true, + "use_first_row_for_vectorname": true + }, + { + "DateTime_format": "yyyy-MM-dd", + "comment_character": "#", + "create_index_column": false, + "description": "Facts about companies selected from the Forbes 500 list for 2000", + "download": "https://dasl.datadescription.com/download/data/3595", + "filename": "Companies-Quickstart", + "name": "Companies Quickstart", + "number_format": 31, + "remove_quotes": true, + "separator": "TAB", + "simplify_whitespaces": false, + "skip_empty_parts": true, + "use_first_row_for_vectorname": true + }, + { + "DateTime_format": "yyyy-MM-dd", + "comment_character": "#", + "create_index_column": false, + "description": "-", + "download": "https://dasl.datadescription.com/download/data/3129", + "filename": "Consumer-spending", + "name": "Consumer spending", + "number_format": 31, + "remove_quotes": true, + "separator": "TAB", + "simplify_whitespaces": false, + "skip_empty_parts": true, + "use_first_row_for_vectorname": true + }, + { + "DateTime_format": "yyyy-MM-dd", + "comment_character": "#", + "create_index_column": false, + "description": "-", + "download": "https://dasl.datadescription.com/download/data/3130", + "filename": "Consumer-spending-post-holiday", + "name": "Consumer spending post holiday", + "number_format": 31, + "remove_quotes": true, + "separator": "TAB", + "simplify_whitespaces": false, + "skip_empty_parts": true, + "use_first_row_for_vectorname": true + } + ], + "subcategory_name": "Economics" + }, + { + "datasets": [ + { + "DateTime_format": "yyyy-MM-dd", + "comment_character": "#", + "create_index_column": false, + "description": "The following description is from Lee (1994): \nRugby football is a popular quasi-amateur sport widely played in the United Kingdom, France, Australia, New Zealand and South Africa. It is rapidly gaining popularity in the US, Canada, Japan and parts of Europe. Recently, some of the rules of the game have been changed, with the aim of making play more exciting. In a study to examine the effects of the rule changes, Hollings and Triggs (1993) collected data on some recent games. \nTypically, a game consists of bursts of activity which terminate when points are scored, if the ball is moved out of the field of play or if an infringement of the rules occurs. In 1992, the investigators gathered data on ten international matches which involved the New Zealand national team, the All Blacks. The first five games studied were the last international games played under the old rules, and the second set of five were the first internationals played under the new rules. \nFor each of the ten games, the data list the successive times (in seconds) of each passage of play in that game. One interest is to see whether the passages were on average longer or shorter under the new rules. (The intention when the rules were changed was almost certainly to make the play more continuous.) ", + "download": "http://www.statsci.org/data/oz/rugby.txt", + "filename": "rugby", + "name": "Time of Passages of Play in Rugby", + "number_format": 31, + "remove_quotes": true, + "separator": "TAB", + "simplify_whitespaces": false, + "skip_empty_parts": true, + "use_first_row_for_vectorname": true + }, + { + "DateTime_format": "yyyy-MM-dd", + "comment_character": "#", + "create_index_column": false, + "description": "Mark Taylor was Captain of the Australian test cricket team from May 1994 until February 1999. By the middle of 1997, the Australian team has won its 7 consecutive international test series, making Taylor the most successful Australian Captain in history. However his poor batting form from mid 1996 to mid 1997 gave the Australian selectors a dilemma in deciding whether his excellent Captaincy made up for the run of poor scores off his own bat. \nThe data below gives Mark Taylor's test scores from the middle of 1989 to the middle of 1995, a period over which he was batting well. Scores were made in Australia's first or second innings of each match. Sometimes Australia was not required to bat twice, in which case the second innings is marked as missing. There are also a number of `not outs'.", + "download": "http://www.statsci.org/data/oz/taylor.txt", + "filename": "taylor_", + "name": "Mark Taylor's Test Cricket Scores", + "number_format": 31, + "remove_quotes": true, + "separator": "TAB", + "simplify_whitespaces": false, + "skip_empty_parts": true, + "use_first_row_for_vectorname": true + }, + { + "DateTime_format": "yyyy-MM-dd", + "comment_character": "#", + "create_index_column": false, + "description": "The Sydney-Hobart yacht race starts from Sydney Harbour on Boxing day (December 26) and finishes several days later in Hobart. It is a 630 nautical mile ocean race. The data give the winning times from 1945 to 1993, as they appeared in the Sydney Morning Herald on 24 December, 1994, plus the winning times for 1994 to 1997. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nYacht\n\nName of winning yacht\n\nYear\n\nYear\n\nDays\n\nDays unit of winning time\n\nHours\n\nHours unit of winning time\n\nMinutes\n\nMinutes unit of winning time\n\nTime\n\nWinning time in minutes (should match time in Days, Hours and Minutes)\n", + "download": "http://www.statsci.org/data/oz/sydhob.txt", + "filename": "sydhob", + "name": "Sydney to Hobart Yacht Race Winning Times", + "number_format": 31, + "remove_quotes": true, + "separator": "TAB", + "simplify_whitespaces": false, + "skip_empty_parts": true, + "use_first_row_for_vectorname": true + }, + { + "DateTime_format": "yyyy-MM-dd", + "comment_character": "#", + "create_index_column": false, + "description": "Winning heights or distances (inches) for the High Jump, Discus and Long Jump events at the Olympics up to 1996. ", + "download": "http://www.statsci.org/data/general/olympic.txt", + "filename": "olympic", + "name": "Olympic Records for High Jump, Discus and Long Jump", + "number_format": 31, + "remove_quotes": true, + "separator": "TAB", + "simplify_whitespaces": false, + "skip_empty_parts": true, + "use_first_row_for_vectorname": true + }, + { + "DateTime_format": "yyyy-MM-dd", + "comment_character": "#", + "create_index_column": false, + "description": "The data give the number of medals won by each medal-winning country in the 1992 Summary Olympic Games in Barcelona, Spain, and the 1994 Winter Olympic Games in Lillehammer, Norway. Also given is the population and latitude of each country. Griffiths et al write: \n... the media spent a lot of time discussing the number of medals won by each country's athletes. The implication was that the comparison was of some importance. However, larger countries would be expected to win more medals than smaller countries, simply because of their larger populations. \n... some viewers, especially those from the smaller countries, felt that the number of medals should be standardised to account for the very wide range of populations, and that a per capita number of medals for a country was a fairer comparison. Others felt that this was unfair to the countries with larger populations - that having twice as many people did not lead to twice as many medals. If standardisation is performed adequately, there should be no systematic relationship between the adjusted medal count and population. \nAlso countries further from the equator might be expected to do better in the winter olympics. \nThe data is incomplete in that countries with no medals are not included. These would be mostly smaller population countries. ", + "download": "http://www.statsci.org/data/oz/medals.txt", + "filename": "medals", + "name": "Olympic Medals", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "This data set was assembled by Rowan Todd and Mark McNaughton, two students studying Statistics at QUT in a class taught by Dr Margaret Mackisack. For a class project they decided to investigate the effect on football game attendance of various covariates. They collected data involving Saturday Australian Football League (AFL) matches at the Melbourne Cricket Ground (MCG). They looked only at matches during the normal home and away season (i.e. not including finals). They used statistics from all such games in 1993 and 1994 (nineteen relevant matches in 1993 and twenty-two in 1994). The response variable measured was attendance at the MCG, and after consideration, they came up with the following covariates: \n\n\n\n\n\nVariable \n\nDescription\n\n\n\n\nMCG \n\nAttendance at the MCG in 1000's.\n\nTemp \n\nTemperature. The forecast maximum temperature on the day of the match, in whole degrees C, found in The Weekend Australian.\n\nOther\n\nAttendance at other matches in 1000's. The sum of the attendances at other AFL matches in Melbourne and Geelong on the same day as the match in question.\n\nMembers\n\nMembership. The sum of the memberships of the two clubs whose teams were playing the match in question in 1000's.\n\nTop50\n\nNumber of players from the top fifty. The number of players in the top 50 in the AFL who happened to be playing in the match in question.\n\nDate\n\nDate of the match in the format dd/mm/yy.\n\nHome\n\nAbbreviation for home team.\n\nAway\n\nAbbreviation for away team.\n", "download": "http://www.statsci.org/data/oz/afl.txt", "filename": "afl", "name": "AFL Crowd Attendance at the MCG", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "The following description is from Lee (1994): \nRugby football is a popular quasi-amateur sport widely played in the United Kingdom, France, Australia, New Zealand and South Africa. It is rapidly gaining popularity in the US, Canada, Japan and parts of Europe. Recently, some of the rules of the game have been changed, with the aim of making play more exciting. In a study to examine the effects of the rule changes, Hollings and Triggs (1993) collected data on some recent games. \nTypically, a game consists of bursts of activity which terminate when points are scored, if the ball is moved out of the field of play or if an infringement of the rules occurs. In 1992, the investigators gathered data on ten international matches which involved the New Zealand national team, the All Blacks. The first five games studied were the last international games played under the old rules, and the second set of five were the first internationals played under the new rules. \nFor each of the ten games, the data list the successive times (in seconds) of each passage of play in that game. One interest is to see whether the passages were on average longer or shorter under the new rules. (The intention when the rules were changed was almost certainly to make the play more continuous.) \n", "download": "http://www.statsci.org/data/oz/rugby.txt", "filename": "rugby_", "name": "Time of Passages of Play in Rugby", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "The data give the record-winning times for 35 hill races in Scotland, as reported by Atkinson (1986). The distance travelled and the height climbed in each race is also given. The data contains a known error - Atkinson (1986) reports that the record for Knock Hill (observation 18) should actually be 18 minutes rather than 78 minutes. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nRace\n\nName of race\n\nDistance\n\nDistance covered in miles\n\nClimb\n\nElevation climbed during race in feet\n\nTime\n\nRecord time for race in minutes\n", "download": "http://www.statsci.org/data/general/hills.txt", "filename": "hills_", "name": "Scottish Hill Races", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "Investigators studied physical characteristics and ability in 13 football punters. Each volunteer punted a football ten times. The investigators recorded the average distance for the ten punts, in feet. They also recorded the average hang time (time the ball is in the air before the receiver catches it) for the ten punts, in seconds. In addition, the investigators recorded five measures of strength and flexibility for each punter: right leg strength (pounds), left leg strength (pounds), right hamstring muscle flexibility (degrees), left hamstring muscle flexibility (degrees), and overall leg strength (foot-pounds). From the study \"The relationship between selected physical performance variables and football punting ability\" by the Department of Health, Physical Education and Recreation at the Virginia Polytechnic Institute and State University, 1983. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nDistance\n\nDistance travelled in feet\n\nHang\n\nTime in air in seconds\n\nR_Strength\n\nRight leg strength in pounds\n\nL_Strength\n\nLeft leg strength in pounds\n\nR_Flexibility\n\nRight leg flexibility in degrees\n\nL_Flexibility\n\nLeft leg flexibility in degrees\n\nO_Strength\n\nOverall leg strength in pounds\n", "download": "http://www.statsci.org/data/general/punting.txt", "filename": "punting", "name": "American Football Punters", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "Data on 102 male and 100 female athletes collected at the Australian Institute of Sport, courtesy of Richard Telford and Ross Cunningham. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nSport\n\nSport\n\nSex\n\nmale or female\n\nHt\n\nHeight in cm\n\nWt\n\nWeight in kg\n\nLBM\n\nLean body mass\n\nRCC\n\nRed cell count\n\nWCC\n\nWhite cell count\n\nHc\n\nHematocrit\n\nHg\n\nHemoglobin\n\nFerr\n\nPlasma ferritin concentration\n\nBMI\n\nBody mass index = weight/height^2\n\nSSF\n\nSum of skin folds\n\n%Bfat\n\n% body fat\n\n\n\n", "download": "http://www.statsci.org/data/oz/ais.txt", "filename": "ais_", "name": "Australian Institute of Sport", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "The data was collected by Grant Elliott, a statistics student at the Queensland University of Technology in a subject taught by Dr Margaret Mackisack. Here is his description of the data and its collection: \nLiving at a squash court spurred on the idea of this experiment. Frustrated playing squash one night, I thought that the squash ball I was playing with seemed to bounce and react differently to what I was previously used to. So I conducted this experiment on the squash ball, looking at the type of ball, temperature of the ball and the age of the ball. \nBall type: In this experiment I used a 'yellow dot' squash ball and a 'double x' squash ball. A 'yellow dot' is super slow and a 'double x' is termed extra super slow. \nTemperature: When playing with a squash ball it tends to heat up. So I took it to extremes where I had 'room temperature' and 'playing temperature'. To duplicate 'playing temperature' the ball was placed in a cup of boiling water for 45 sec. \nAge: I expected age to be my most significant factor. Squash balls, being a sealed ball, shouldn't vary when they get older, so I used a new ball and compared it to an old ball. \nProcedure: I first thought of dropping the balls from a set height and seeing how far they bounced against a tape measure. This idea was scrapped as too much error came into it because you couldn't accurately measure when the maximum height of the bounce was. I then thought of a ball machine. I set the ball machine up and measured how far back did the ball come off the front wall when shot out of the ball machine. This eliminated a lot of varying in my figures as the ball machine shoots the balls out at roughly the same speed and trajectory. It doesn't take all the varying out as I wouldn't know whether the ball machine does shoot it out at exactly the same speed, but it keeps variation to a minimum. \nCriticism: Measuring the distance from the wall was done by my friend and I. We both would watch from different angles and would see where the ball landed. This means our figures are probably out by a couple of centimetres. When the balls were dropped into the water I forgot to take some of them out after 45 sec. Also with some I moved them around in the water to get the heat distributed evenly but others I forgot to move as I was collecting and organising the next ball. Another criticism is the temperature of the water. I put new boiling water into the cup after 4 balls had been in it. Therefore the last ball to go in wouldn't be the same temperature as the first ball.", "download": "http://www.statsci.org/data/oz/squash.txt", "filename": "squash", "name": "Squash Ball Experiment", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "The dynamic and repetitive nature of running means that runners are particularly prone to over-use injuries such as lower back pain. Chronic pain is often caused by muscle imbalances, which result in faulty alignment of and abnormal stresses applied to the spinal column. Muscle imbalances originate as adapations in motor control due to pain or external stimuli, and are then reinforced and preserved by repetition. \nThis study, conducted by Physiotherapy student Andrew Mooney, examined the flexibility of four major muscle groups associated with movement of the hip, with particular attention to imbalances between the left and right sides or between the dominant and non-dominant sides. \nA total of 33 male subjects were included in the study. The subjects were divided into three groups: 11 runners with low back pain, 11 runners without low back pain and 11 sedentary individuals without low back pain. (Runners were recruited from the Ashgrove and Toowong athletics clubs, non runners from the University of Queensland and the general community. Runners with lower back pain were recruited first. Once this subject group was tested, subjects for the two control groups were recruited to match the runners with low back pain according to age, height and weight.) \nThe muscle groups examined were \nthe iliopsoas, \nthe rectus femoris, \nthe tensor fascia lata/iliotibial band (ITB/TFL), and \nthe hamstrings \nFor each muscle group, two measures of flexibility were used. The first, relative flexibility, was related to the range of movement of the joint before postural compensations occurred, and the second was a measure of the maximal functional length of the muscle. Relative flexibility and functional length were measured for each muscle group on both the left and right sides of the body. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nSubject \n\nSubject number, 1 - 33\n\nGroup\n\nPain, NoPain or Sedentary\n\nMatch\n\n1 - 11, indicating matched triples\n\nAge\n\nAge of subject in years\n\nHeight\n\nHeight of subject in cm\n\nWeight\n\nWeight of subject in kg\n\nDistance\n\nType of running event: Sprint, middle distance (Mid) or long distance (Long)\n\nYears\n\nNumber of years running\n\nDominant\n\nDominant side, Left or Right\n\nDF.Iliopsoas\n\nRelative flexibility of iliopsoas muscle on dominant side in degrees. Positive numbers indicate above the horizontal, negative numbers below the horizontal.\n\nDF.Rectus\n\nRelative flexibility of rectus femoris muscle on dominant side in degrees\n\nDF.ITBTFL\n\nRelative flexibility of ITB/TFL muscle on dominant side in degrees\n\nDF.Hamstring\n\nRelative flexibility of hamstring muscles on dominant side in degrees\n\nDL.Iliopsoas\n\nFunctional length of iliopsoas muscle on dominant side\n\nDL.Rectus\n\nFunctional length of rectus femoris muscle on dominant side\n\nDL.ITBTFL\n\nFunctional length of ITB/TFL muscle on dominant side\n\nDL.Hamstring\n\nFunctional length of hamstring muscles on dominant side\n\nNF.Iliopsoas\n\nRelative flexibility of iliopsoas muscle on nondominant side in degrees\n\nNF.Rectus\n\nRelative flexibility of rectus femoris muscle on nondominant side in degrees\n\nNF.ITBTFL\n\nRelative flexibility of ITB/TFL muscle on nondominant side in degrees\n\nNF.Hamstring\n\nRelative flexibility of hamstring muscles on nondominant side in degrees\n\nNL.Iliopsoas\n\nFunctional length of iliopsoas muscle on nondominant side\n\nNL.Rectus\n\nFunctional length of rectus femoris muscle on nondominant side\n\nNL.ITBTFL\n\nFunctional length of ITB/TFL muscle on nondominant side\n\nNL.Hamstring\n\nFunctional length of hamstring muscles on nondominant side\n", "download": "http://www.statsci.org/data/oz/backpain.txt", "filename": "backpain", "name": "Runners with Low Back Pain", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "The data come from the 1990 Pilot Surf/Health Study of NSW Water Board. The first column takes values 1 or 2 according to the recruit's perception of whether (s)he is a Frequent OCean Swimmer, the second column has values 1 or 4 according to recruit's usually chosen swimming location (1 for non-beach, 4 for beach), the third column has values 2 (aged 15-19), 3 (aged 20-25), or 4 (aged 25-29), the fourth column has values 1 (male) or 2 (female) and finally, the fifth column has the number of self-diagnosed ear infections that were reported by the recruit.", "download": "http://www.statsci.org/data/oz/earinf.txt", "filename": "earinf", "name": "Ear Infections in Swimmers", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true + }, + { + "DateTime_format": "yyyy-MM-dd", + "comment_character": "#", + "create_index_column": false, + "description": " Alex Rodriguez (known to fans as A-Rod)was the youngest player ever to hit 500 home runs. The file holds the number of home runs hit by A-Rod during the 1994–2016 seasons. Describe the distribution, mentioning its shape and any unusual features. ", + "download": "https://dasl.datadescription.com/download/data/3038", + "filename": "a-rod-2016", + "name": "A-Rod 2016", + "number_format": 31, + "remove_quotes": true, + "separator": "TAB", + "simplify_whitespaces": false, + "skip_empty_parts": true, + "use_first_row_for_vectorname": true + }, + { + "DateTime_format": "yyyy-MM-dd", + "comment_character": "#", + "create_index_column": false, + "description": "In Olympic Archery both men and women start with a field of 64 qualifiers. Each archer shoots a round of 72 arrows (total possible score: 720) to establish a seeding position. Then they participate in a single-elimination contest. Thus, the seeding round is the only one that provides data for all archers (because some are […] ", + "download": "https://dasl.datadescription.com/download/data/3056", + "filename": "Archery", + "name": "Archery", + "number_format": 31, + "remove_quotes": true, + "separator": "TAB", + "simplify_whitespaces": false, + "skip_empty_parts": true, + "use_first_row_for_vectorname": true + }, + { + "DateTime_format": "yyyy-MM-dd", + "comment_character": "#", + "create_index_column": false, + "description": "American League baseball games are played under the designated hitter rule, meaning that pitchers, often weak hitters, do not come to bat. Baseball owners believe that the designated hitter rule means more runs scored, which in turn means higher attendance. Is there evidence that more fans attend games if the teams score more runs? The […] ", + "download": "https://dasl.datadescription.com/download/data/3057", + "filename": "Attendance-2016", + "name": "Attendance 2016", + "number_format": 31, + "remove_quotes": true, + "separator": "TAB", + "simplify_whitespaces": false, + "skip_empty_parts": true, + "use_first_row_for_vectorname": true + }, + { + "DateTime_format": "yyyy-MM-dd", + "comment_character": "#", + "create_index_column": false, + "description": "It has been suggested that children born in the summer have an advantage over their peers when it comes to sports, perhaps because they can be outdoors when they are young. The data report the number of professional ballplayers born in each month of the year for one season of professional baseball. ", + "download": "https://dasl.datadescription.com/download/data/3060", + "filename": "Ballplayer-births", + "name": "Ballplayer births", + "number_format": 31, + "remove_quotes": true, + "separator": "TAB", + "simplify_whitespaces": false, + "skip_empty_parts": true, + "use_first_row_for_vectorname": true + }, + { + "DateTime_format": "yyyy-MM-dd", + "comment_character": "#", + "create_index_column": false, + "description": "-", + "download": "https://dasl.datadescription.com/download/data/3063", + "filename": "Baseball-attendance", + "name": "Baseball attendance", + "number_format": 31, + "remove_quotes": true, + "separator": "TAB", + "simplify_whitespaces": false, + "skip_empty_parts": true, + "use_first_row_for_vectorname": true + }, + { + "DateTime_format": "yyyy-MM-dd", + "comment_character": "#", + "create_index_column": false, + "description": "-", + "download": "https://dasl.datadescription.com/download/data/3064", + "filename": "Baseball-circumferences", + "name": "Baseball circumferences", + "number_format": 31, + "remove_quotes": true, + "separator": "TAB", + "simplify_whitespaces": false, + "skip_empty_parts": true, + "use_first_row_for_vectorname": true + }, + { + "DateTime_format": "yyyy-MM-dd", + "comment_character": "#", + "create_index_column": false, + "description": "Ballplayers have been signing ever larger contracts. The highest salaries (in millions of dollars per season) for each year since 1874 are in the data file. ", + "download": "https://dasl.datadescription.com/download/data/3065", + "filename": "Baseball-salaries-2015", + "name": "Baseball salaries 2015", + "number_format": 31, + "remove_quotes": true, + "separator": "TAB", + "simplify_whitespaces": false, + "skip_empty_parts": true, + "use_first_row_for_vectorname": true + }, + { + "DateTime_format": "yyyy-MM-dd", + "comment_character": "#", + "create_index_column": false, + "description": "-", + "download": "https://dasl.datadescription.com/download/data/3066", + "filename": "Baseball-salaries-2016", + "name": "Baseball salaries 2016", + "number_format": 31, + "remove_quotes": true, + "separator": "TAB", + "simplify_whitespaces": false, + "skip_empty_parts": true, + "use_first_row_for_vectorname": true + }, + { + "DateTime_format": "yyyy-MM-dd", + "comment_character": "#", + "create_index_column": false, + "description": "-", + "download": "https://dasl.datadescription.com/download/data/3067", + "filename": "Baseball-weights", + "name": "Baseball weights", + "number_format": 31, + "remove_quotes": true, + "separator": "TAB", + "simplify_whitespaces": false, + "skip_empty_parts": true, + "use_first_row_for_vectorname": true + }, + { + "DateTime_format": "yyyy-MM-dd", + "comment_character": "#", + "create_index_column": false, + "description": "-", + "download": "https://dasl.datadescription.com/download/data/3069", + "filename": "Basketball-shots", + "name": "Basketball shots", + "number_format": 31, + "remove_quotes": true, + "separator": "TAB", + "simplify_whitespaces": false, + "skip_empty_parts": true, + "use_first_row_for_vectorname": true + }, + { + "DateTime_format": "yyyy-MM-dd", + "comment_character": "#", + "create_index_column": false, + "description": "A company that makes basketballs has the motto: “Our basketballs are ready to play.” Therefore, it is important to the company that the basketballs are inflated with the proper amount of air when shipped. Most basketballs are inflated to 7 to 9 pounds per square inch. Recently the company selected a random basketball from its […] ", + "download": "https://dasl.datadescription.com/download/data/3068", + "filename": "Basketballs", + "name": "Basketballs", + "number_format": 31, + "remove_quotes": true, + "separator": "TAB", + "simplify_whitespaces": false, + "skip_empty_parts": true, + "use_first_row_for_vectorname": true + }, + { + "DateTime_format": "yyyy-MM-dd", + "comment_character": "#", + "create_index_column": false, + "description": "The Belmont Stakes is the last and longest of the three horse races that make up the Triple Crown. Curiously, in some of the Belmont races horses have run clockwise around the track, and in others they have run counterclockwise. Do the horses care? But note that the length of the race has also not […] ", + "download": "https://dasl.datadescription.com/download/data/3072", + "filename": "Belmont-stakes-2015", + "name": "Belmont stakes 2015", + "number_format": 31, + "remove_quotes": true, + "separator": "TAB", + "simplify_whitespaces": false, + "skip_empty_parts": true, + "use_first_row_for_vectorname": true } ], "subcategory_name": "Sport" }, { "datasets": [ { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "After purchasing a batch of flight helmets that did not fit the heads of many pilots, the NZ Airforce decided to mesure the headsizes of all recruits. Before this was carried out, information was collected to determine the feasibility of using cheap cardboard callipers to make the measurements, instead of metal ones which were expensive and uncomfortable. The data lists the head diameters of 18 recruits measured once using cardboard callipers and again using metal callipers. One question is whether there is any systematic difference between the two sets of callipers. One might also ask whether there is more variability in the cardboard callipers measurement than that of the metal callipers. ", "download": "http://www.statsci.org/data/oz/nzhelmet.txt", "filename": "nzhelmet", "name": "Helmet Sizes for New Zealand Airforce", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "These data were collected as part of a project for the Federal Office for Road Safety conducted by the Research Institute of Gender and Health at the University of Newcastle. There is evidence that women drivers who are involved in motor vehicle accidents are more likely than men to be injured. A possible reason is that women often drive smaller cars that provide less protection in a collision. One of the aims of the project was to examine preferences for cars among men and women and investigate the extent to which safety was a factor in determining preferences. \nThe survey was conducted by research assistants who asked people in car parks to participate and administered a structured questionnaire. They were instructed to obtain data from men and women with small, medium and large cars, with 50 people per group for a total of 300 respondents. (The sample size was based on power requirements for another part of the survey that involved anthropometric measurements.) The research assistants approached people in car parks of the University of Newcastle and nearby shopping centres during December 1997 and January 1998. \nThe data consist of 300 records each with 22 variables. The variables are: \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nID\n\nIdentification number of respondent\n\nAge\n\nAge of respondent (years)\n\nSex\n\n1=female, 2=male\n\nLicYr\n\nTime they have held a full driving licence, in years and months (years)\n\nLicMth\n\nTime they have held a full driving licence, in years and months (months)\n\nActCar\n\nMake, model and year of car most often driven, coded to size of car 1=small, 2=medium, 3=large\n\nKids5\n\nChildren under five, 1=yes, 2=no\n\nKids6\n\nChildren 6 to 16, 1=yes, 2=no\n\nPrefCar\n\nPreferred car, coded to size of car 1=small, 2=medium, 3=large\n\nCar15k\n\nPreferred type of car if cost $15000, 1=small new car; 2=large second-hand car\n\nReason\n\n1=safety, 2=reliability, 3=cost, 4=performance, 5=comfort, 6=looks\n\nCost\n\nHow important is cost when buying a car? 1=not important, 2=little importance, 3=important, 4=very important\n\nReliable\n\nHow important is reliability ...?\n\nPerform\n\nHow important is performance ...?\n\nFuel\n\nHow important is fuel consumption ...?\n\nSafety\n\nHow important is safety ...?\n\nAC/PS\n\nHow important is air conditioning/power steering ...?\n\nPark\n\nHow important is ease of parking ...?\n \nRoom\n \nHow important is space/roominess ...?\n \nDoors\n \nHow important is the number of doors ...?\n \nPrestige\n \nHow important is prestige/style ...?\n \nColour\n \nHow important is colour ...?\n", "download": "http://www.statsci.org/data/oz/carprefs.txt", "filename": "carprefs", "name": "Car Preferences", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "Do you use up the same amount of the soap in the shower each morning, or does it depend on the size of the bar of soap? This data was collected by Rex Boggs of Glenmore State High School in Rockhampton, Queensland. Rex writes: \nI had a hypothesis that the daily weight of my bar of soap in my shower wasn't a linear function, the reason being that the tiny little bar of soap at the end of its life seemed to hang around for just about ever. I wanted to throw it out, but I felt I shouldn't do so until it became unusable. And that seemed to take weeks. \nAlso I had recently bought some digital kitchen scales and felt I needed to use them to justify the cost. I hypothesised that the daily weight of a bar of soap might be dependent upon surface area, and hence would be a quadratic function. \nI kept records for three weeks (the life of the bar), and was amazed to find that the data was linear with a very high R2 value, until the last few days of its life. \nThe data ends at day 22. On day 23 the soap broke into two pieces and one piece went down the plughole ... \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nDate \n\nDate of observation\n\nDay \n\nNumber of days since beginning of experiment\n\nWeight \n\nWeight of soap bar (grams)\n\n\n\n", "download": "http://www.statsci.org/data/oz/soap.txt", "filename": "soap", "name": "Bar of Soap", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "These data are for specimens of 50 varieties of timber, for modulus of rigidity, modulus of elasticity and air dried density, arranged in increasing order of magnitude of the density. ", "download": "http://www.statsci.org/data/oz/timber.txt", "filename": "timber", "name": "Timber Data", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "A soft drink bottler is analyzing vending machine service routes in his distribution system. He is interested in predicting the amount of time required by the route driver to service the vending machines in an outlet. This service activity including stocking the machine with beverage products and minor maintenance or housekeeping. The industrial engineer responsible for the study has suggested that the two most important variables affecting the delivery time are the number of cases of product stocked and the distance walked by the route driver. The engineer has collected 25 observations on delivery time (minutes), number of cases and distance walked (feet).", "download": "http://www.statsci.org/data/general/softdrin.txt", "filename": "softdrin", "name": "Soft Drink Delivery Times", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "Measurements of weight and tar, nicotine, and carbon monoxide content\nare given for 25 brands of domestic cigarettes.\n\nThe Federal Trade Commission annually rates varieties of domestic\ncigarettes according to their tar, nicotine, and carbon monoxide\ncontent. The United States Surgeon General considers each of these\nsubstances hazardous to a smoker's health. Past studies have shown\nthat increases in the tar and nicotine content of a cigarette are\naccompanied by an increase in the carbon monoxide emitted from the\ncigarette smoke.\n\nThe data presented here are taken from Mendenhall and Sincich (1992)\nand are a subset of the data produced by the Federal Trade Commission.\n\nFor more information, see the article \"Using Cigarette Data for an\nIntroduction to Multiple Regression\" by Lauren McIntyre in Volume 2,\nNumber 1, of the _Journal of Statistics Education_. ", "download": "http://jse.amstat.org/datasets/cigarettes.dat.txt", "filename": "cigarettes", "name": "Cigarette data for an introduction to multiple regression", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": false }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "Impact strength of insulation cuts in foot-pounds. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nLot\n\nLot of insulating material\n\nCut\n\nLengthwise (Length) or crosswise (Cross)\n\nStrength\n\nImpact strength in foot-pounds\n\n\n\n", "download": "http://www.statsci.org/data/general/insulate.txt", "filename": "insulate", "name": "Impact Strength Of Insulation Cuts", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "The data was collected by Stewart Fischer and David Tippetts, statistics students at the Queensland University of Technology in a subject taught by Dr Margaret Mackisack. Here is their description of the data and its collection: \nThe experiment decided upon was to see if by using two different designs of paper aeroplane, how far the plane would travel. In considering this, the question arose, whether different types of paper and different angles of release would have any effect on the distance travelled. Knowing that paper aeroplanes are greatly influenced by wind, we had to find a way to eliminate this factor. We decided to perform the experiment in a hallway of the University, where the effects of wind can be controlled to some extent by closing doors. \nIn order to make the experimental units as homogeneous as possible we allocated one person to a task, so person 1 folded and threw all planes, person 2 calculated the random order assignment, measured all the distances, checked that the angles of flight were right, and checked that the plane release was the same each time. \nThe factors that we considered each had two levels as follows: \nPaper: A4 size, 80gms and 50gms\nDesign: High Performance Dual Glider, and Incredibly Simple Glider (patterns attached to original report)\nAngle of release: Horizontal, or 45 degrees upward. \nThe random order assignment was calculated using the random number function of a calculator. Each combination of factors was assigned a number from one to eight, the random numbers were generated and accordingly the order of the experiment was found. \n\n\n\n\nVariable\n\nDescription\n\n\n\n\nDistance\n\nDistance travelled in mm\n\nPaper\n\n80gms = 1, 50gms = 2\n\nAngle\n\nHorizontal = 1, 45 degrees = 2\n\nDesign\n\nHigh-performance = 1, Incredibly simple = 2\n\nOrder\n\nOrder in which the runs were conducted\n", "download": "http://www.statsci.org/data/oz/planes.txt", "filename": "planes", "name": "Paper Plane Experiment", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "\"Discovery Day\" is a day set aside by the United States Naval Postgraduate School in Monterey, California, to invite the general public into its laboratories. On Discovery Day, 21 October 1995, data on reaction time and hand-eye coordination were collected on 118 members of the public who visited the Human Systems Integration Laboratory. The age and sex of each subject were also recorded. Visitors were mostly in family groups. \nOne experiment which demonstrates motor learning and hand-eye coordination, is rotary pursuit tracking. The equipment used has a rotating disk with a 3/4\" target spot. The subject’s task is to maintain contact with the target spot with a metal wand. Trials were conducted for 15 seconds at a time, and the total contact time during the 15 seconds was recorded. Four trials were recorded for each of 108 subjects. \nThe target spot on the Circle tracker keeps constant speed in a circular path. The target spot on the Box tracker has varying speeds as it traverses the box, making the task potentially more difficult. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nSex\n\nMale (M) or female (F)\n\nAge\n\nAge of subject in years\n\nShape\n\nBox or Circle\n\nTrial1\n\nContact time for 1st trial\n\nTrial2\n\nContact time for 2nd trial\n\nTrial3\n\nContact time for 3rd trial\n\nTrial4\n\nContact time for 4th trial\n\n\n\n", "download": "http://www.statsci.org/data/general/tracking.txt", "filename": "tracking", "name": "Rotary Pursuit Tracking", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "Experiment conducted by Bill Afantenou, second year statistics student at QUT. Here is his description of the experiment: \n``As I am a big pizza lover, I had much pleasure in involving pizza in my experiment. I became curious to find out the time it took for a pizza to be delivered to the front door of my house. I was interested to see how, by varying whether I ordered thick or thin crust, whether Coke was ordered with the pizza and whether garlic bread was ordered with the pizza, the response would be affected. \n``Because of my current financial status and limitation of time, I decided to have only two replicates, just to get a reasonable estimate of the variance. To decrease my financial burden I managed a deal with the manager of the pizza shop. I managed to get the pickup special, delivered to my house, which was the cheapest and smallest pizza made. I tried to repeat the experiment in as nearly as possible identical conditions to reduce `noise'. \n``I ordered the pizza from the same shop, being Domino's Pizza. To be consistent I ordered a Supreme pizza each time at approximately the same time of day. The response was measured from the time I closed the telephone to the time the pizza was delivered to the front door of my house. \n``I wrote each of the eight treatments on a piece of paper twice, put them all into a hat, mixed them up, and took them out one at a time to allocate the order in which each treatment was done. \n``As well as the response and treatment for each pizza delivery the actual hour of delivery was recorded, also the order in which the treatments were done and whether the driver was male or female.'' \n\n\n\n\nVariable\n\nDescription\n\n\n\n\nCrust\n\nThin=0, Thick=1\n\nCoke\n\nNo=0, Yes=1\n\nBread\n\nGarlic bread. No=0, Yes=1\n\nDriver\n\nMale=M, Female=F\n\nHour\n\nTime of order in hours since midnight\n\nDelivery\n\nDelivery time in minutes\n", - "download": "http://www.statsci.org/data/oz/pizza.txt", - "filename": "pizza", - "name": "Pizza Delivery Experiment", + "description": "Experiment conducted by Bill Afantenou, second year statistics student at QUT. Here is his description of the experiment: \n``As I am a big pizza lover, I had much pleasure in involving pizza in my experiment. I became curious to find out the time it took for a pizza to be delivered to the front door of my house. I was interested to see how, by varying whether I ordered thick or thin crust, whether Coke was ordered with the pizza and whether garlic bread was ordered with the pizza, the response would be affected. \n``Because of my current financial status and limitation of time, I decided to have only two replicates, just to get a reasonable estimate of the variance. To decrease my financial burden I managed a deal with the manager of the pizza shop. I managed to get the pickup special, delivered to my house, which was the cheapest and smallest pizza made. I tried to repeat the experiment in as nearly as possible identical conditions to reduce `noise'. \n``I ordered the pizza from the same shop, being Domino's Pizza. To be consistent I ordered a Supreme pizza each time at approximately the same time of day. The response was measured from the time I closed the telephone to the time the pizza was delivered to the front door of my house. \n``I wrote each of the eight treatments on a piece of paper twice, put them all into a hat, mixed them up, and took them out one at a time to allocate the order in which each treatment was done. \n``As well as the response and treatment for each pizza delivery the actual hour of delivery was recorded, also the order in which the treatments were done and whether the driver was male or female.'' \n\n\n\n\nVariable\n\nDescription\n\n\n\n\nCrust\n\nThin=0, Thick=1\n\nCoke\n\nNo=0, Yes=1\n\nBread\n\nGarlic bread. No=0, Yes=1\n\nDriver\n\nMale=M, Female=F\n\nHour\n\nTime of order in hours since midnight\n\nDelivery\n\nDelivery time in minutes\n", + "download": "http://www.statsci.org/data/oz/pizza.txt", + "filename": "pizza", + "name": "Pizza Delivery Experiment", + "number_format": 31, + "remove_quotes": true, + "separator": "TAB", + "simplify_whitespaces": false, + "skip_empty_parts": true, + "use_first_row_for_vectorname": true + }, + { + "DateTime_format": "yyyy-MM-dd", + "comment_character": "#", + "create_index_column": false, + "description": "An experiment is conducted to compare the energy requirements of three physical activities: running, walking and bicycle riding. Eight subjects are asked to run, walk and bicycle a measured distance, and the number of kilocalories expended per kilometre is determined for each subject during each activity. The activities are run in random order with time for recovery between activities. Each activity was monitored exactly once for each individual. ", + "download": "http://www.statsci.org/data/general/energy.txt", + "filename": "energy", + "name": "Energy Requirements Running, Walking and Cycling", + "number_format": 31, + "remove_quotes": true, + "separator": "TAB", + "simplify_whitespaces": false, + "skip_empty_parts": true, + "use_first_row_for_vectorname": true + }, + { + "DateTime_format": "yyyy-MM-dd", + "comment_character": "#", + "create_index_column": false, + "description": "The data set comprises the results of a saturated 215-11 fractional factorial with 4 observations per run. There were 15 controllable factors. The responses are the proportional shrinkage of four samples taken from 3000-foot lengths of speedometer cable manufactured at each set of conditions. The objective was to reduce the post-extrusion shrinkage of the speedometer casing. \n\n\n\n\nVariable\n\nDescription\n\n\n\n\nA\n\nline OD\n\nB\n\nliner die\n\nC\n\nliner material\n\nD\n\nliner line speed\n\nE\n\nwire braid type\n\nF\n\nbraiding tension\n\nG\n\nwire diameter\n\nH\n\nliner tension\n\nI\n\nliner temperature\n\nJ\n\ncosting material\n\nK\n\ncoating die type\n\nL\n\nmelt temperature\n\nM\n\nscreen pack\n\nN\n\ncooling method\n\nO\n\nline speed\n\ny1\n\nshrinkage value of first sample\n\ny2\n\nshrinkage value of second sample\n\ny3\n\nshrinkage value of third sample\n\ny4\n\nshrinkage value of fourth sample\n", + "download": "http://www.statsci.org/data/general/speedome.txt", + "filename": "speedome", + "name": "Speedometer-Cable Shrinkage", + "number_format": 31, + "remove_quotes": true, + "separator": "TAB", + "simplify_whitespaces": false, + "skip_empty_parts": true, + "use_first_row_for_vectorname": true + }, + { + "DateTime_format": "yyyy-MM-dd", + "comment_character": "#", + "create_index_column": false, + "description": "The data are from a Proctor and Gamble study reported by Smith and Dubey (1964) on the amount of available chlorine in a product as a function of time since manufacture. Theoretical considerations lead to the model \nChlorine = a + (0.49 - a) exp{ -b (Weeks - 8) } \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nWeeks\n\nTime in weeks since manufacture\n\nChlorine\n\nAvailable chlorine\n", + "download": "http://www.statsci.org/data/general/chlorine.txt", + "filename": "chlorine", + "name": "Available Chlorine", + "number_format": 31, + "remove_quotes": true, + "separator": "TAB", + "simplify_whitespaces": false, + "skip_empty_parts": true, + "use_first_row_for_vectorname": true + }, + { + "DateTime_format": "yyyy-MM-dd", + "comment_character": "#", + "create_index_column": false, + "description": "The data give the normalized magnitudes of the voice data when the vowel 'ooh' was sung at a pitch of 290 Hz. A Kurzweil K2500 Sampler/Synthesizer was used to capture and to store the data. \nThe frequencies found in the signal can be used to identify the phonetical vowel, and are of interest in voice synthesis, therapy and training. Further details are given in Oliver (1997). \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nMagnitude\n\nNormalized Magnitudes at equi-spaced time intervals\n\n\n\n", + "download": "http://www.statsci.org/data/general/ooh.txt", + "filename": "ooh", + "name": "Voice Data from Singing the Vowel 'ooh'", + "number_format": 31, + "remove_quotes": true, + "separator": "TAB", + "simplify_whitespaces": false, + "skip_empty_parts": true, + "use_first_row_for_vectorname": true + }, + { + "DateTime_format": "yyyy-MM-dd", + "comment_character": "#", + "create_index_column": false, + "description": "In studies aimed at characterising an author's style, samples of n words are taken and the number of function words in each sample counted. Often binomial or Poisson distributions are assumed to hold for the proportions of function words. The table shows the combined frequencies (x) of the articles \"the\", \"a\" and \"an\" in samples from Macauley's \"Essay on Milton\", taken from the Oxford edition of Macualey's (1923) literary essays. Non-overlapping samples were drawn from opening words of two randomly chosen lines from each of 50 pages of printed text, 10 word samples being simply extensions of 5 word samples. The data show clear evidence of underdispersion.", + "download": "http://www.statsci.org/data/oz/wdcount.txt", + "filename": "wdcount", + "name": "Underdispersed Word Counts", + "number_format": 31, + "remove_quotes": true, + "separator": "TAB", + "simplify_whitespaces": false, + "skip_empty_parts": true, + "use_first_row_for_vectorname": true + }, + { + "DateTime_format": "yyyy-MM-dd", + "comment_character": "#", + "create_index_column": false, + "description": "Progressive Insurance asked customers who had been involved in auto accidents how far they were from home when the accident happened. ", + "download": "https://dasl.datadescription.com/download/data/3039", + "filename": "accidents", + "name": "Accidents", + "number_format": 31, + "remove_quotes": true, + "separator": "TAB", + "simplify_whitespaces": false, + "skip_empty_parts": true, + "use_first_row_for_vectorname": true + }, + { + "DateTime_format": "yyyy-MM-dd", + "comment_character": "#", + "create_index_column": false, + "description": "At a barbershop music singing competition, choruses are judged on three scales: Music (quality of the arrangement, etc.), Performance, and Singing. The scales are supposed to be independent of each other, and each is scored by a different judge, but a friend claims that he can predict a chorus’s singing score from the other two […] ", + "download": "https://dasl.datadescription.com/download/data/3061", + "filename": "Barbershop-music", + "name": "Barbershop music", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "An experiment is conducted to compare the energy requirements of three physical activities: running, walking and bicycle riding. Eight subjects are asked to run, walk and bicycle a measured distance, and the number of kilocalories expended per kilometre is determined for each subject during each activity. The activities are run in random order with time for recovery between activities. Each activity was monitored exactly once for each individual. ", - "download": "http://www.statsci.org/data/general/energy.txt", - "filename": "energy", - "name": "Energy Requirements Running, Walking and Cycling", + "description": "In 2016 13.27 million people attended a Broadway show, paying an average of more than $100 per ticket. The Broadway League, Inc. (https://www.broadwayleague.com/research/statistics-broadway-nyc/) provides some historical and current data. These variables are available for each year since the 1984-85 season: Season (The initial year of the season, so the 1984-85 season is 1984.) Gross ($M) […] ", + "download": "https://dasl.datadescription.com/download/data/3087", + "filename": "Broadway-shows", + "name": "Broadway shows", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "The data set comprises the results of a saturated 215-11 fractional factorial with 4 observations per run. There were 15 controllable factors. The responses are the proportional shrinkage of four samples taken from 3000-foot lengths of speedometer cable manufactured at each set of conditions. The objective was to reduce the post-extrusion shrinkage of the speedometer casing. \n\n\n\n\nVariable\n\nDescription\n\n\n\n\nA\n\nline OD\n\nB\n\nliner die\n\nC\n\nliner material\n\nD\n\nliner line speed\n\nE\n\nwire braid type\n\nF\n\nbraiding tension\n\nG\n\nwire diameter\n\nH\n\nliner tension\n\nI\n\nliner temperature\n\nJ\n\ncosting material\n\nK\n\ncoating die type\n\nL\n\nmelt temperature\n\nM\n\nscreen pack\n\nN\n\ncooling method\n\nO\n\nline speed\n\ny1\n\nshrinkage value of first sample\n\ny2\n\nshrinkage value of second sample\n\ny3\n\nshrinkage value of third sample\n\ny4\n\nshrinkage value of fourth sample\n", - "download": "http://www.statsci.org/data/general/speedome.txt", - "filename": "speedome", - "name": "Speedometer-Cable Shrinkage", + "description": "Fast food is often considered unhealthy because much of it is high in both fat and sodium. But are the two related? The data give the fat and sodium contents of several brands of burgers. ", + "download": "https://dasl.datadescription.com/download/data/3088", + "filename": "Burgers", + "name": "Burgers", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "The data are from a Proctor and Gamble study reported by Smith and Dubey (1964) on the amount of available chlorine in a product as a function of time since manufacture. Theoretical considerations lead to the model \nChlorine = a + (0.49 - a) exp{ -b (Weeks - 8) } \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nWeeks\n\nTime in weeks since manufacture\n\nChlorine\n\nAvailable chlorine\n", - "download": "http://www.statsci.org/data/general/chlorine.txt", - "filename": "chlorine", - "name": "Available Chlorine", + "description": "The dataset holds facts about candy bars read from their nutrition labels. The data are a good example for multiple regression (e.g. what contributes to the calories of a candy bar?). For such an analysis, the indicator variable for nuts appears to work well. Note that 5 sugar-free candy bars are marked as NA in […] ", + "download": "https://dasl.datadescription.com/download/data/3092", + "filename": "Candy-bars", + "name": "Candy bars", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "The data give the normalized magnitudes of the voice data when the vowel 'ooh' was sung at a pitch of 290 Hz. A Kurzweil K2500 Sampler/Synthesizer was used to capture and to store the data. \nThe frequencies found in the signal can be used to identify the phonetical vowel, and are of interest in voice synthesis, therapy and training. Further details are given in Oliver (1997). \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nMagnitude\n\nNormalized Magnitudes at equi-spaced time intervals\n\n\n\n", - "download": "http://www.statsci.org/data/general/ooh.txt", - "filename": "ooh", - "name": "Voice Data from Singing the Vowel 'ooh'", + "description": "In 1998, as an advertising campaign, the Nabisco Company announced a “1000 Chips Challenge,” claiming that every 18-ounce bag of their Chips Ahoy! cookies contained at least 1000 chocolate chips. Dedicated statistics students at the Air Force Academy randomly selected bags of cookies and counted the chocolate chips. The data report their counts. ", + "download": "https://dasl.datadescription.com/download/data/3110", + "filename": "Chips-Ahoy", + "name": "Chips Ahoy!", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "In studies aimed at characterising an author's style, samples of n words are taken and the number of function words in each sample counted. Often binomial or Poisson distributions are assumed to hold for the proportions of function words. The table shows the combined frequencies (x) of the articles \"the\", \"a\" and \"an\" in samples from Macauley's \"Essay on Milton\", taken from the Oxford edition of Macualey's (1923) literary essays. Non-overlapping samples were drawn from opening words of two randomly chosen lines from each of 50 pages of printed text, 10 word samples being simply extensions of 5 word samples. The data show clear evidence of underdispersion.", - "download": "http://www.statsci.org/data/oz/wdcount.txt", - "filename": "wdcount", - "name": "Underdispersed Word Counts", + "description": "The website rcdb.com, the Roller Coaster Database, holds facts about every roller coaster in the world, current or past. (If you know of one that is missing, please let the site master know.) These data are for recently opened coasters, most of which are still in operation.", + "download": "https://dasl.datadescription.com/download/data/3118", + "filename": "Coasters-2015", + "name": "Coasters 2015", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true } ], "subcategory_name": "Other" }, { "datasets": [ { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "The data give the number of deaths in prison custody in Australia in each of the six years 1990 to 1995, given separately for Aboriginal and Torres Strait Islanders (indigenous) and others (non-indigenous). \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nYear\n\n1990 through 1995\n\nIndigenous\n\nYes = Aboriginal or Torres Strait Islander, No = Non-indigenous\n\nPrisoners\n\nTotal number in prison custody\n\nDeaths\n\nNumber of deaths in prison custody\n\nPopulation\n\nAdult population (15+ years)\n\n\n\n\nThe data were collected in response to the Royal Commission into Aboriginal Deaths in Custody, the final report of which was tabled in the Federal Parliament on the 9 May 1991. \nThe report of the Royal Commission has two streams. One is concerned with the ninety-nine Aboriginal and Torres Strait Islander deaths in custody which occurred throughout Australia during the period 1 January 1980 to 31 May 1989. Issues around the causes of death, culpability of custodians and their employers, and the prevention of future deaths were addressed in depth. The second stream concerned what the Royal Commission called the ‘underlying issues’: the social, cultural, and legal factors which, in the view of the Commissioners, had some bearing on the deaths. These underlying issues, as revealed from the chapter headings of the Royal Commission’s National Report, included the Legacy of History, Aboriginal Society Today, Relations With the Non-Aboriginal Community, The Harmful Use of Alcohol and Other Drugs, Schooling, Employment, Unemployment and Poverty, Housing and Infrastructure, Land Needs, and Self-determination. \nThe link between the Royal Commission’s discussion of the individual deaths investigated, the prevention of future deaths and the underlying issues, is its position on the over-representation of Indigenous people in custody in Australia. A central conclusion of the Royal Commission, illustrating this point, was as follows: \nThe work of the commission has established that Aboriginal people in custody do not die at a greater rate than non-Aboriginal people in custody. \nHowever, what is overwhelming different is the rate at which Aboriginal people come into custody, compared with the rate of the general community ... The ninety-nine who died in custody illustrate that over-representation and, in a sense, are the victims of it. \nThe conclusions are clear. Aboriginal people die in custody at a rate relevant to their proportion of the whole population which is totally unacceptable and which would not be tolerated if it occurred in the non-Aboriginal community. But this occurs not because Aboriginal people in custody are more likely to die than others in custody, but because the Aboriginal population is grossly over-represented in custody. Too many Aboriginal people are in custody too often (Johnston, 1991, Vol 1, p6).", "download": "http://www.statsci.org/data/oz/custody.txt", "filename": "custody", "name": "Aboriginal Deaths in Custody", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "Facts on the countries of Asia. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nCountry \n\nName\n\nArea \n\nTotal area (sq km)\n\nPopulation \n\nPopulation July 1995 est.\n\nLife \n\nLife Expectancy 1995 est. (years)\n\nGDP \n\nGDP 1994 (US$ billions)\n\nGDP/caput \n\nGDP per person 1994 est (US$)\n\n\n\n", "download": "http://www.statsci.org/data/oz/asia.txt", "filename": "asia", "name": "Countries of Asia", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true + }, + { + "DateTime_format": "yyyy-MM-dd", + "comment_character": "#", + "create_index_column": false, + "description": "The United States Census Bureau keeps track of the number of adoptions in each State (and Washington D.C.). The data includes the population of each state as well. How should adoptions be summarized and displayed? ", + "download": "https://dasl.datadescription.com/download/data/3043", + "filename": "Adoptions", + "name": "Adoptions", + "number_format": 31, + "remove_quotes": true, + "separator": "TAB", + "simplify_whitespaces": false, + "skip_empty_parts": true, + "use_first_row_for_vectorname": true } ], "subcategory_name": "Administration" }, { "datasets": [ { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "The data give the survival status of passengers on the Titanic, together with their names, age, sex and passenger class. \nAbout half of the ages for the 3rd Class passengers are missing, although a good many of these could be filled in from the original source below. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nName\n\nRecorded name of passenger\n\nPClass\n\nPassenger class: 1st, 2nd or 3rd\n\nAge\n\nAge in years\n\nSex\n\nmale or female\n\nSurvived\n\n1 = Yes, 0 = No\n\n\n\n", "download": "http://www.statsci.org/data/general/titanic.txt", "filename": "titanic_", "name": "Passengers on the Titanic", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "For each of ten streets with bike lanes, investigators measured the distance between the centre line and a cylist in the bike lane. They used photography to determine the distance between the cyclist and a passing car on those same ten streets, recording all distances in feet. \n", "download": "http://www.statsci.org/data/general/cyclist.txt", "filename": "cyclist", "name": "Distance of Cars from Cyclists", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "Hourly carbon monoxide (CO) averages were recorded on summer weekdays at a measurement station in Los Angeles. The station was established by the Environmental Protection Agency as part of a larger study to assess the effectiveness of the catalytic converter. It was located about 25 feet from the San Diego Freeway, which in this particular area is located at 145 degrees north. It was located such that winds from 145 to 325 degress (which in the summer are the prevalent wind directions during the daylight hours) transport the CO emissions from the highway toward the measurement station. Aggregate measurements were recored for each hour of the day 1 to 24. \nHour \n- \nhour of the day, from midnight to midnight \nCO \n- \naverage summer weekday CO concentration (parts per million) \nTD \n- \naverage weekday traffic density (traffic count/traffic speed) \nWS \n- \naverage perpendicular wind-speed component,\nwind speed x cos(wind direction - 235 degrees) \n\nIt would be interesting to have wind speed and direction recorded separately. ", "download": "http://www.statsci.org/data/general/cofreewy.txt", "filename": "cofreewy", "name": "Carbon Monoxide from a Freeway", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "This is a highly fractionated two-level factorial design employed as a screening design in an off-line welding experiment performed by the National Railway Corporation of Japan. There were 16 runs and 9 experimental factors. The response variable is the observed tensile strength of the weld, one of several quality characteristics measured. All other variables are at plus and minus levels. \n\n\n\n\nVariable\n\nDescription\n\n\n\n\nRods\n\nKind of welding rods\n\nDrying\n\nPeriod of drying\n\nMaterial\n\nWelded material\n\nThickness\n\nThickness\n\nAngle\n\nAngle\n\nOpening\n\nOpening\n\nCurrent\n\nCurrent\n\nMethod\n\nWelding method\n\nPreheating\n\nPreheating\n\nStrength\n\nTensile strength of the weld in kg/mm\n", "download": "http://www.statsci.org/data/general/welding.txt", "filename": "welding", "name": "Tensile Strength of Welds", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "Military pilots sometimes black out when their brains are deprived of oxygen due to G-forces during violent maneuvers. Glaister and Miller (1990) produced similar symptoms by exposing volunteers’ lower bodies to negative air pressure, likewise decreasing oxygen to the brain. The data lists the subjects' ages and whether they showed syncopal blackout related signs (pallor, sweating, slow heartbeat, unconsciousness) during an 18 minute period. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nSubject\n\nInitials of the subject's name\n\nAge\n\nSubject's age in years\n\nSigns\n\nWhether subject showed blackout-related signs (0=No, 1=Yes)\n", "download": "http://www.statsci.org/data/general/gforces.txt", "filename": "gforces", "name": "G-Induced Loss of Consciousness", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true + }, + { + "DateTime_format": "yyyy-MM-dd", + "comment_character": "#", + "create_index_column": false, + "description": "Commercial airlines overbook flights, selling more tickets than they have seats, because a sizeable number of reservation holders don’t show up in time for their flights. But sometimes, there are more passengers wishing to board than there are seats. Most airlines try to entice travelers to voluntarily give up their seats in return for free […] ", + "download": "https://dasl.datadescription.com/download/data/3048", + "filename": "Airline-bumping", + "name": "Airline bumping 2017", + "number_format": 31, + "remove_quotes": true, + "separator": "TAB", + "simplify_whitespaces": false, + "skip_empty_parts": true, + "use_first_row_for_vectorname": true + }, + { + "DateTime_format": "yyyy-MM-dd", + "comment_character": "#", + "create_index_column": false, + "description": "https://www.albany.edu/sourcebook/csv/t3177.csv adapted from: U.S. Department of Transportation, Federal Aviation Administration, Semiannual Report to Congress on the Effectiveness of the Civil Aviation Security Program, July 1 to \"December 31, 1978, Exhibit 10; July 1 to December 31, 1982, Exhibit 10; July 1 to December 31, 1984, Exhibit 7; July 1 to December 31, 1989, p. 11 (Washington, DC: U.S. Department of Transportation); U.S. Department of Transportation, Federal Aviation Administration, Annual Report to Congress on Civil Aviation Security, January 1, 1993-December 31, 1993, p. 9; January 1, 1995-December 31, 1995, p. 11 (Washington, DC: U.S. Department of Transportation); and data provided by the U.S. Department of Transportation, Federal Aviation Administration and Bureau of Transportation Statistics [Online]. Available: http://www.bts.gov/publications/national_transportation_statistics/ 2003/html/table_02_16.html [May 24, 2004]. Table adapted by SOURCEBOOK staff.", + "download": "https://dasl.datadescription.com/download/data/3049", + "filename": "Airport-screening", + "name": "Airport screening", + "number_format": 31, + "remove_quotes": true, + "separator": "TAB", + "simplify_whitespaces": false, + "skip_empty_parts": true, + "use_first_row_for_vectorname": true + }, + { + "DateTime_format": "yyyy-MM-dd", + "comment_character": "#", + "create_index_column": false, + "description": "The Bicycle Helmet Safety Institute website includes a report on the number of bicycle fatalities per year in the United States. The data gives the counts for the years 1994–2015. ", + "download": "https://dasl.datadescription.com/download/data/3073", + "filename": "Bike-safety-2015", + "name": "Bike safety 2015", + "number_format": 31, + "remove_quotes": true, + "separator": "TAB", + "simplify_whitespaces": false, + "skip_empty_parts": true, + "use_first_row_for_vectorname": true + }, + { + "DateTime_format": "yyyy-MM-dd", + "comment_character": "#", + "create_index_column": false, + "description": "The dataset is the number of camp sites at each of the public parks in Vermont ", + "download": "https://dasl.datadescription.com/download/data/3091", + "filename": "Camp-sites", + "name": "Camp sites", + "number_format": 31, + "remove_quotes": true, + "separator": "TAB", + "simplify_whitespaces": false, + "skip_empty_parts": true, + "use_first_row_for_vectorname": true } ], "subcategory_name": "Travel" }, { "datasets": [ { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "The data give the number of deaths cuased by firearms in Australia from 1983 to 1997, expressed as a rate per 100,000 of population. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nYear \n\nYear\n\nRate \n\nNumber of deaths caused by firearms per 100,000 population\n", "download": "http://www.statsci.org/data/oz/firearms.txt", "filename": "firearms", "name": "Deaths Caused by Firearms", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "Criminologists are interested in the effect of punishment regimes on crime rates. This has been studied using aggregate data on 47 states of the USA for 1960. The data set contains the following columns: \n \nVariable\n \nDescription\n\nM\n\npercentage of males aged 14–24 in total state population\n\nSo\n\nindicator variable for a southern state \n\nEd\n\nmean years of schooling of the population aged 25 years or over\n\nPo1\n\nper capita expenditure on police protection in 1960 \n\nPo2\n\nper capita expenditure on police protection in 1959 \n\nLF\n\nlabour force participation rate of civilian urban males in the age-group 14-24\n\nM.F\n\nnumber of males per 100 females \n\nPop\n\nstate population in 1960 in hundred thousands\n\nNW\n\npercentage of nonwhites in the population \n\nU1\n\nunemployment rate of urban males 14–24 \n\nU2\n\nunemployment rate of urban males 35–39 \n\nWealth\n\nwealth: median value of transferable assets or family income\n\nIneq\n\nincome inequality: percentage of families earning below half the median income\n\nProb\n\nprobability of imprisonment: ratio of number of commitments to number of offenses\n\nTime\n\naverage time in months served by offenders in state prisons before their first release\n\nCrime\n\ncrime rate: number of offenses per 100,000 population in 1960\n", "download": "http://www.statsci.org/data/general/uscrime.txt", "filename": "uscrime", "name": "Effect of Punishment Regimes on Crime Rates", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "A number of homicide incidents in Australia have involved multiple killings. A multiple killing is defined as any incident where two or more persons are murdered. According to available literature, there have been 24 multiple killings by firearm between 1987 and 1996. These resulted in 128 deaths. The data give the number of multiple killings which have been recorded for the period 1987 to 28 April 1996. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nYear \n\n1987 - 1996.\n\nIncidents \n\nNumber of multiple killings\n\nDeaths \n\nTotal number of deaths\n\n\n\n\nThe data for the year 1996 include killings only up to and including 28 April.", "download": "http://www.statsci.org/data/oz/multkill.txt", "filename": "multkill", "name": "Multiple Killings Committed with a Firearm", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true } ], "subcategory_name": "Crime" }, { "datasets": [ { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "An individual's critical flicker frequency is the highest frequency at which the flicker in a flickering light source can be detected. At frequencies above the critical frequency, the light source appears to be continuous even though it is actually flickering. This investigation recorded critical flicker frequency and iris colour of the eye for 19 subjects. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nColour\n\nEye colour: Brown, Green or Blue\n\nFlicker\n\nCritical flicker frequency in cycles/sec\n", "download": "http://www.statsci.org/data/general/flicker.txt", "filename": "flicker", "name": "Eye Colour and Flicker Frequency", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true + }, + { + "DateTime_format": "yyyy-MM-dd", + "comment_character": "#", + "create_index_column": false, + "description": "The data are a random sample from the data in Population commute times.", + "download": "https://dasl.datadescription.com/download/data/3123", + "filename": "Commute-times-sample100", + "name": "Commute times sample100", + "number_format": 31, + "remove_quotes": true, + "separator": "TAB", + "simplify_whitespaces": false, + "skip_empty_parts": true, + "use_first_row_for_vectorname": true } ], "subcategory_name": "Population" }, { "datasets": [ { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "The data were collected as part of a time study for Telecom, now known as Telstra. The purpose if the study was to model the total hours worked in a section of Telecom in terms of the counts of various tasks. It was hoped that such a model could be used to predict hours worked and hence staffing requirements in changing circumstances. The number of hours worked by employees in a fault reporting centre were recorded, together with the number of faults of each type which were recorded. \nEmployees often work on a flexitime system which allows them to build up time and to leave early every second Friday. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nHours\n\nNumber of hours worked\n\nByDa\n\nNumber of talks of a certain type\n\nPR\n\n\n\nRWT\n\nA type of fault variable \n\nFault\n\n\n\nSOA\n\nNumber of service orders of type A \n\nSOB\n\nNumber of service orders of type B \n\nSOC\n\nNumber of service orders of type C \n\nCable\n\n\n\nField\n\nField call \n\nHot\n\nHotline \n\nREST\n\n\n\nSpec\n\n\n\nApp\n\n\n\nProb\n\n\n\nSC\n\n\n\nHO\n\n\n\nMO\n\n\n\nDay\n\nDay of the week: 1-Monday, 2-Tuesday, 3-Wednesday, 4-Thursday, 5-Friday \n", "download": "http://www.statsci.org/data/oz/telecom.txt", "filename": "telecom", "name": "Telecom Work Measurement Study", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "An industrial Taguchi experiment was performed to study the influence of several controllable factors on the mean value and the variation in the percentage of shrinkage of products made by injection moulding. For studying the variation, three noise factors were also included in the design. All factors were set at two levels. \nThe problem is a `nominal-is-best' problem where the aim is to reach a certain tartet for the percentage shrinkage, at the same time having as small as variation as possible about the target value. The design that was applied is a so-called Taguchi L8(27)-design with seven controllable factors. At each setting of the controllable factors, the noise factors were varied according to a Taguchi L4(23)-design. \n\n\n\n\nVariable\n\nDescription\n\n\n\n\nControllable Factors:\n\nCycle\n\nCycle time\n\nMould\n\nMould temperature\n\nCavity\n\nCavity thickness\n\nPressure\n\nHolding pressure\n\nSpeed\n\nInjection speed\n\nTime\n\nHolding time\n\nGate\n\nGate size\n\nNoise Factors:\n\nRegrind\n\nPercentage regrind\n\nMoisture\n\nMoisture content\n\nTemperature\n\nAmbient temperature\n\nResponse:\n\nShrinkage\n\nPercentage shrinkage\n", "download": "http://www.statsci.org/data/general/injmould.txt", "filename": "injmould", "name": "Injection Moulding Shrinkage", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "The data give the ambient temperature and the number of primary O-rings damaged for 23 of the 24 space shuttle launches before the launch of the space shuttle Challenger on January 20, 1986. (Challenger was the 25th shuttle. One engine was lost at sea and could not be examined.) Each space shuttle contains 6 primary O-rings. \nThe forecast temperate of the launching day of the Challenger was 31 degrees F. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nTemp\n\nAmbient temperature\n\nDamaged\n\nNumber of O-rings damaged\n", "download": "http://www.statsci.org/data/general/challenger.txt", "filename": "challenger", "name": "Space Shuttle Challenger", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "The data consist of failures of a piece of electronic equipment operating in two modes. For each operating period, Mode1 is the time spent operating in one mode and Mode2 is the time spent operating in the other. The total number of failures recorded in each period is recorded. \n \n\n\n \nVariable \n \nDescription\n \n\n\n \nMode1 \n \nTime in operating mode 1\n \nMode2 \n \nTime in operating mode 2\n \nFailures\n \nNumber of failures\n \n\n\n", "download": "http://www.statsci.org/data/general/twomodes.txt", "filename": "twomodes", "name": "Failures of Electronic Equipment", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "Sanford Weisberg writes \nWhen gasoline is pumped into a tank, hydrocarbon vapors are forced out of a tank and into the atmosphere. To reduce this significant source of air pollution, devices are installed to capture the vapor. In testing these vapor recovery systems, the amount that escapes cannot be measured, but a \"sniffer\" can determine if some vapor is escaping. Also, the amount that is recovered can be measured. To estimate the efficiency of the system, some method of estimating the total amount given off must be used. To this end, a laboratory experiment was conducted in which the amount of vapor given off was measured under carefully controlled conditions. Four variables are relevant for modeling. In an experiment, these conditions were varied and the quantity of emitted hydrocarbons was measured in grams. \n\n\n \n \nVariable\n \nDescription\n\n\n\n\n \n \nTankTemp\n - \ninitial tank temperature (�F)\n\n\nGasTemp\n - \ntemperature of the dispensed gasoline (�F)\n\n\nTankPres\n - \ninitial vapor pressure in the tank (psi)\n\n\nGasPres\n - \nvapor pressure of the dispensed gasoline (psi)\n\n\nHC\n - \nemitted hydrocarbons (g)\n\n\n", "download": "http://www.statsci.org/data/general/gasvapor.txt", "filename": "gasvapor", "name": "Sniffing for Hydrocarbon Vapour", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true + }, + { + "DateTime_format": "yyyy-MM-dd", + "comment_character": "#", + "create_index_column": false, + "description": "https://en.wikipedia.org/wiki/List_of_U.S._states_by_electricity_production_from_renewable_sources", + "download": "https://dasl.datadescription.com/download/data/3051", + "filename": "Alternative-energy", + "name": "Alternative energy 2016", + "number_format": 31, + "remove_quotes": true, + "separator": "TAB", + "simplify_whitespaces": false, + "skip_empty_parts": true, + "use_first_row_for_vectorname": true + }, + { + "DateTime_format": "yyyy-MM-dd", + "comment_character": "#", + "create_index_column": false, + "description": "In a statement to a Senate Public Works Committee, a senior executive of Texaco, Inc., cited a study on the effectiveness of auto filters on reducing noise. Because of concerns about performance, two types of filters were studied, a standard silencer and a new device developed by the Associated Octel Company. Noise is in decibels/10. […] ", + "download": "https://dasl.datadescription.com/download/data/3058", + "filename": "Auto-noise-filters", + "name": "Auto noise filters", + "number_format": 31, + "remove_quotes": true, + "separator": "TAB", + "simplify_whitespaces": false, + "skip_empty_parts": true, + "use_first_row_for_vectorname": true + }, + { + "DateTime_format": "yyyy-MM-dd", + "comment_character": "#", + "create_index_column": false, + "description": "A student experiment was run to test the performance of 4 brands of batteries under 2 different Environments (room temperature and cold). For each of the 8 treatments, 2 batteries of a particular brand were put into a flashlight. The flashlight was then turned on and allowed to run until the light went out. The […] ", + "download": "https://dasl.datadescription.com/download/data/3070", + "filename": "Batteries", + "name": "Batteries", + "number_format": 31, + "remove_quotes": true, + "separator": "TAB", + "simplify_whitespaces": false, + "skip_empty_parts": true, + "use_first_row_for_vectorname": true + }, + { + "DateTime_format": "yyyy-MM-dd", + "comment_character": "#", + "create_index_column": false, + "description": "Stopping distances in feet for a car tested 3 times at each of 5 speeds. We hope to create a model that predicts Stopping Distance from the Speed of the car. ", + "download": "https://dasl.datadescription.com/download/data/3086", + "filename": "Brakes", + "name": "Brakes", + "number_format": 31, + "remove_quotes": true, + "separator": "TAB", + "simplify_whitespaces": false, + "skip_empty_parts": true, + "use_first_row_for_vectorname": true + }, + { + "DateTime_format": "yyyy-MM-dd", + "comment_character": "#", + "create_index_column": false, + "description": "Measurements on 38 1978-79 model automobiles. Gas mileage in miles per gallon as measured by Consumers’ Union on a test track. Other values as reported by automobile manufacturer. Used to illustrate regression model building and diagnosis. Be sure to check the residuals when predicting MPG. ", + "download": "https://dasl.datadescription.com/download/data/3096", + "filename": "Cars", + "name": "Cars", + "number_format": 31, + "remove_quotes": true, + "separator": "TAB", + "simplify_whitespaces": false, + "skip_empty_parts": true, + "use_first_row_for_vectorname": true + }, + { + "DateTime_format": "yyyy-MM-dd", + "comment_character": "#", + "create_index_column": false, + "description": "A start-up company has developed an improved electronic chip for use in laboratory equipment. The company needs to project the manufacturing cost, so it develops a spreadsheet model that takes into account the purchase of production equipment, overhead, raw materials, depreciation, maintenance, and other business costs. The spreadsheet estimates the cost of producing 10,000 to […] ", + "download": "https://dasl.datadescription.com/download/data/3109", + "filename": "Chips", + "name": "Chips", + "number_format": 31, + "remove_quotes": true, + "separator": "TAB", + "simplify_whitespaces": false, + "skip_empty_parts": true, + "use_first_row_for_vectorname": true + }, + { + "DateTime_format": "yyyy-MM-dd", + "comment_character": "#", + "create_index_column": false, + "description": "-", + "download": "https://dasl.datadescription.com/download/data/3126", + "filename": "Computer-chip", + "name": "Computer chip manufacturing", + "number_format": 31, + "remove_quotes": true, + "separator": "TAB", + "simplify_whitespaces": false, + "skip_empty_parts": true, + "use_first_row_for_vectorname": true } ], "subcategory_name": "Technology" + }, + { + "datasets": [ + { + "DateTime_format": "yyyy-MM-dd", + "comment_character": "#", + "create_index_column": false, + "description": "The Pew Research Center conducted a representative telephone survey in October of 2016. Among the reported results was the following table concerning the preferred political party affiliation of respondents and their ages for white voters. Is there evidence of age-based differences in party affiliation in the United States for white voters? ", + "download": "https://dasl.datadescription.com/download/data/3045", + "filename": "Age-and-party", + "name": "Age and party 2016", + "number_format": 31, + "remove_quotes": true, + "separator": "TAB", + "simplify_whitespaces": false, + "skip_empty_parts": true, + "use_first_row_for_vectorname": true + } + ], + "subcategory_name": "Politics" + }, + { + "datasets": [ + { + "DateTime_format": "yyyy-MM-dd", + "comment_character": "#", + "create_index_column": false, + "description": "1998 Baby data from http://www.nber.org/natality/ftp.cdc.gov/pub/Health_Statistics/NCHS/Dataset_Documentation/DVS/natality/", + "download": "https://dasl.datadescription.com/download/data/3059", + "filename": "Babysamp", + "name": "Babysamp 98", + "number_format": 31, + "remove_quotes": true, + "separator": "TAB", + "simplify_whitespaces": false, + "skip_empty_parts": true, + "use_first_row_for_vectorname": true + }, + { + "DateTime_format": "yyyy-MM-dd", + "comment_character": "#", + "create_index_column": false, + "description": "Births per 1000 population in the United States, starting in 1965. There has been concern that the birthrate may be declining. A good model for tends in birthrate may allow for some prediction. ", + "download": "https://dasl.datadescription.com/download/data/3075", + "filename": "Birthrates-2015", + "name": "Birthrates 2015", + "number_format": 31, + "remove_quotes": true, + "separator": "TAB", + "simplify_whitespaces": false, + "skip_empty_parts": true, + "use_first_row_for_vectorname": true + } + ], + "subcategory_name": "Demographics" + }, + { + "datasets": [ + { + "DateTime_format": "yyyy-MM-dd", + "comment_character": "#", + "create_index_column": false, + "description": "A statistics professor at a large university polled his students to find out what their majors were and what position they held in the family birth order. The results are summarized in the table.", + "download": "https://dasl.datadescription.com/download/data/3076", + "filename": "Birth-order", + "name": "Birth order", + "number_format": 31, + "remove_quotes": true, + "separator": "TAB", + "simplify_whitespaces": false, + "skip_empty_parts": true, + "use_first_row_for_vectorname": true + }, + { + "DateTime_format": "yyyy-MM-dd", + "comment_character": "#", + "create_index_column": false, + "description": "The technology committee at a school has stated that the average time spent by students per lab visit has increased and the increase supports their argument that they need to increase lab fees.\nTo substantiate this claim, the committee randomly sampled 12 student lab visits and noted the amount of time spent using the computer. The times in minutes are given:", + "download": "https://dasl.datadescription.com/download/data/3127", + "filename": "Computer-lab", + "name": "Computer lab fees", + "number_format": 31, + "remove_quotes": true, + "separator": "TAB", + "simplify_whitespaces": false, + "skip_empty_parts": true, + "use_first_row_for_vectorname": true + } + ], + "subcategory_name": "Education" } ] }, { "category_name": "Physics", "subcategories": [ { "datasets": [ { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "Why does the moon appear to be so much larger when it is near the horizon than when it is directly overhead? This question has produced a wide variety of theories from psychologists. An important early hypothesis was put forth by Holway and Boring (1940) who suggested that the illusion was due to the fact that when the moon was on the horizon, the observer looked straight at it with eyes level, whereas when it was at its zenith, the observer had to elevate his or her eyes as well as his or her head to see it. To test this hypothesis, Kaufman and Rock (1962) devised an apparatus that allowed them to present two artificial moons, one at the horizon and one at the zenith, and to control whether the subjects elevated their eyes or kept them level to see the zenith moon. The horizon, or comparison, moon was always viewed with eyes level. Subjects were asked to adjust the variable horizon moon to match the size of the zenith moon or vice versa. For each subject the ratio of the perceived size of the horizon moon to the perceived size of the zenith moon was recorded with eyes elevated and with eyes level. A ratio of 1.00 would represent no illusion. If Holway and Boring were correct, there should be a greater illusion in the eyes-elevated condition than in the eyes-level condition.", "download": "http://www.statsci.org/data/general/moon.txt", "filename": "moon", "name": "The Moon Illusion", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "The data give the magnitudes of a variable star at midnight on 600 consecutive nights. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nStar\n\nMagnitude on each night\n", "download": "http://www.statsci.org/data/general/star.txt", "filename": "star", "name": "Magnitudes of a Variable Star", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "The data gives a sequence of observations on the magnitude of a variable Cepheid star made from the Mount Stromlo Observatory near Canberra in Australia. The observations were made as part of the MACHO project. \nThe MACHO project monitors millions of stars every night with a dedicated telescope at Mount Stromlo Observatory. The collaboration is probing the halo of our galaxy in order to detect dark matter in the form of Massive Compact Halo Objects -- MACHOs. These are astronomical bodies that emit negligible visible light, such as dwarf or neutron stars, large planets, and black holes. Detection of a MACHO is achieved by observing its gravitational lensing effect on a chance background star as the MACHO crosses near the line of sight between the observer and this star. In order to detect a sufficiently large number of MACHOs, the project collects observations on an large number of distant stars over an extended period of time. Data are being collected daily over a 4-year period (weather permitting), on approximately 8 million stars in the Large Magellanic Cloud (LMC) and the bulge of the Milky Way. \nThis database is a valuable resource for many other types of astronomical research. It is the most comprehensive catalog of stars in the LMC and contains stars much dimmer than those covered by previous surveys. Temporal coverage is unusually long compared to most star surveys, which permits a comprehensive study of star variability, including long periods and transient phenomena. About 40,000 variable stars have been observed in the LMC and a similar number in the galactic bulge. \nVariable stars are stars for which the intensity of the emitted energy changes over time; for periodic variable stars the change of intensity is periodic over time. Common types of periodic variable stars include eclipsing binaries, RR Lyraes, and Cepheids. Cepheids are very bright stars with periods of 1-70 days. The light curve has an asymmetric shape, and rises more rapidly than it falls. Cepheids with periods of about 1 week tend to have a bump in the descending part of the curve. For periods of about 10 days, the bump is at the peak of the curve, and for longer periods it is on the rising part of the curve. The brightness changes are caused by periodic pulsation (contraction and expansion) of the stars and their outer layers. \nThere are numerous additional types of variable stars, and each of the categories above contains subcategories. For example, Beat Cepheids and Beat RR Lyrae oscillate at more than one frequency. Different classes of variable stars can be located in different regions of a plot of magnitude versus temperature or spectral type. For example, RR Lyrae and Cepheids lie on a strip called the \"instability strip.\" Different types of variable curves are classified also on the basis of the shapes of their light curves and the relationships of shapes to period, for example. As well as being important for studies of stellar structure and evolution, these classes are used to determine distances on a cosmic scale by means of the relationship of their periods to their magnitudes. \nObservations of these stars are typically made at rather irregular times, depending on observation schedules and sky conditions. Different observations have differing errors. All this makes determination of the periods and the shapes of the corresponding phased light curves an interesting statistical problem. \nThis particular star is a category 1 Cepheid (magnitude -9.166) at coordinates (1541.5,1395.1). Magnitudes were recorded in the blue band from 4500 to 6300 Angstroms. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nTime\n\nTime of observation in days\n\nMagnitude\n\nDifferential Magnitude\n\nSD\n\nStandard deviation of the magnitude observation. A value of -99 indicates missing and negative value indicate unreliable observations.\n", "download": "http://www.statsci.org/data/oz/ceph1.txt", "filename": "ceph1", "name": "Magnitudes of Variable Star Cepheid 1", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "The data gives observations on the magnitude of a Cepheid variable star made from the Mount Stromlo Observatory near Canberra in Australia. Magnitudes were recorded separately for the blue and red bands. Observation times were irregularly spaced depending on sky conditions and the observation schedule. The observations were made as part of the MACHO project. This particular star is Cepheid star number 2. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nTime\n\nTime of observation in days\n\nBlue\n\nDifferential magnitude in the blue band from 4500 to 6300 Angstroms\n\nBlueSD\n\nStandard deviation of the blue-band observation. A value of -99 indicates missing and negative values indicate unreliable observations.\n\nRed\n\nDifferential magnitude in the red band from 6300 to 7600 Anstroms\n\nRedSD\n\nStandard deviation of the red-band observation. A value of -99 indicates missing and negative values indicate unreliable observations.\n\n\n\n", "download": "http://www.statsci.org/data/oz/ceph2.txt", "filename": "ceph2", "name": "Magnitudes of Variable Star Cepheid 2", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "The data gives a sequence of observations on the magnitude of an eclipsing binary variable star made from the Mount Stromlo Observatory near Canberra in Australia. The observations were made as part of the MACHO project. \nThe MACHO project monitors millions of stars every night with a dedicated telescope at Mount Stromlo Observatory. The collaboration is probing the halo of our galaxy in order to detect dark matter in the form of Massive Compact Halo Objects -- MACHOs. These are astronomical bodies that emit negligible visible light, such as dwarf or neutron stars, large planets, and black holes. Detection of a MACHO is achieved by observing its gravitational lensing effect on a chance background star as the MACHO crosses near the line of sight between the observer and this star. In order to detect a sufficiently large number of MACHOs, the project collects observations on an large number of distant stars over an extended period of time. Data are being collected daily over a 4-year period (weather permitting), on approximately 8 million stars in the Large Magellanic Cloud (LMC) and the bulge of the Milky Way. \nThis database is a valuable resource for many other types of astronomical research. It is the most comprehensive catalog of stars in the LMC and contains stars much dimmer than those covered by previous surveys. Temporal coverage is unusually long compared to most star surveys, which permits a comprehensive study of star variability, including long periods and transient phenomena. About 40,000 variable stars have been observed in the LMC and a similar number in the galactic bulge. \nVariable stars are stars for which the intensity of the emitted energy changes over time; for periodic variable stars the change of intensity is periodic over time. Common types of periodic variable stars include eclipsing binaries, RR Lyraes, and Cepheids. Eclipsing binaries consist of two stars orbiting each other in a conformation relative to the observer such that brightness variability occurs as one star passes in front of the other in turn; as the stars may be of different brightness, the drop in light flux depends on which star is in the front. These stars have periods of between 3 hours and 24 years, although 0.5 to 10 days is the most common range. \nThere are numerous additional types of variable stars, and each of the categories above contains subcategories. For example, Beat Cepheids and Beat RR Lyrae oscillate at more than one frequency. Different classes of variable stars can be located in different regions of a plot of magnitude versus temperature or spectral type. For example, RR Lyrae and Cepheids lie on a strip called the \"instability strip.\" Different types of variable curves are classified also on the basis of the shapes of their light curves and the relationships of shapes to period, for example. As well as being important for studies of stellar structure and evolution, these classes are used to determine distances on a cosmic scale by means of the relationship of their periods to their magnitudes. \nObservations of these stars are typically made at rather irregular times, depending on observation schedules and sky conditions. Different observations have differing errors. All this makes determination of the periods and the shapes of the corresponding phased light curves an interesting statistical problem. \nThis particular star is a category 1 eclipsing binary (magnitude -10.26) at coordinates (1617.8, 669.35). Magnitudes were recorded in the blue band from 4500 to 6300 Angstroms. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nTime\n\nTime of observation in days\n\nMagnitude\n\nDifferential Magnitude\n\nSD\n\nStandard deviation of the magnitude observation. A value of -99 indicates missing and negative value indicate unreliable observations.\n", "download": "http://www.statsci.org/data/oz/ecbi1041.txt", "filename": "ecbi1041", "name": "Magnitudes of an Eclipsing Binary Variable Star", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "The data gives observations on the magnitude of a RR Lyrae variable star made from the Mount Stromlo Observatory near Canberra in Australia. Magnitudes were recorded separately for the blue and red bands. Observation times were irregularly spaced depending on sky conditions and the observation schedule. The observations were made as part of the MACHO project. This particular star is RRab Lyrae star number 1061, a category 1 star with an asymmetric signal. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nTime\n\nTime of observation in days\n\nBlue\n\nDifferential magnitude in the blue band from 4500 to 6300 Angstroms\n\nBlueSD\n\nStandard deviation of the blue-band observation. A value of -99 indicates missing and negative values indicate unreliable observations.\n\nRed\n\nDifferential magnitude in the red band from 6300 to 7600 Anstroms\n\nRedSD\n\nStandard deviation of the red-band observation. A value of -99 indicates missing and negative values indicate unreliable observations.\n", "download": "http://www.statsci.org/data/oz/rrl1061.txt", "filename": "rrl1061", "name": "Magnitudes of Variable Star RR Lyrae 1061", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "The data gives observations on the magnitude of a RR Lyrae variable star made from the Mount Stromlo Observatory near Canberra in Australia. Magnitudes were recorded separately for the blue and red bands. Observation times were irregularly spaced depending on sky conditions and the observation schedule. The observations were made as part of the MACHO project. This particular star is RRc Lyrae star number 1198, a category 1 star with a symmetric signal. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nTime\n\nTime of observation in days\n\nBlue\n\nDifferential magnitude in the blue band from 4500 to 6300 Angstroms\n\nBlueSD\n\nStandard deviation of the blue-band observation. A value of -99 indicates missing and negative values indicate unreliable observations.\n\nRed\n\nDifferential magnitude in the red band from 6300 to 7600 Anstroms\n\nRedSD\n\nStandard deviation of the red-band observation. A value of -99 indicates missing and negative values indicate unreliable observations.\n", "download": "http://www.statsci.org/data/oz/rrl1198.txt", "filename": "rrl1198", "name": "Magnitudes of Variable Star RR Lyrae 1198", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "The data gives observations on the magnitude of a RR Lyrae variable star made from the Mount Stromlo Observatory near Canberra in Australia. Magnitudes were recorded separately for the blue and red bands. Observation times were irregularly spaced depending on sky conditions and the observation schedule. The observations were made as part of the MACHO project. This particular star is RRc Lyrae star number 1263, a category 1 star with a symmetric signal. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nTime\n\nTime of observation in days\n\nBlue\n\nDifferential magnitude in the blue band from 4500 to 6300 Angstroms\n\nBlueSD\n\nStandard deviation of the blue-band observation. A value of -99 indicates missing and negative values indicate unreliable observations.\n\nRed\n\nDifferential magnitude in the red band from 6300 to 7600 Anstroms\n\nRedSD\n\nStandard deviation of the red-band observation. A value of -99 indicates missing and negative values indicate unreliable observations.\n\n\n\n", "download": "http://www.statsci.org/data/oz/rrl1263.txt", "filename": "rrl1263", "name": "Magnitudes of Variable Star RR Lyrae 1263", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true } ], "subcategory_name": "Astronomy" }, { "datasets": [ { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "These data are the result of a study involving the analysis of performance degradation data from accelerated tests. The response variable is dialectric breakdown strength in kilo-volts, and the predictor variables are time in weeks and temperature in degrees Celcius. The study can be viewed as an 8 by 4 factorial experiment. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nStrength\n\nDialectric breakdown strength in kilovolts\n\nTime\n\nDuration of testing in weeks (8 levels)\n\nTemperature\n\nTemperature in degrees Celsius (4 levels)\n", "download": "http://www.statsci.org/data/general/dialectr.txt", "filename": "dialectr", "name": "Dialectric Breakdown Strength", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true } ], "subcategory_name": "Electronics" }, { "datasets": [ { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "The data give time series measurements on waves emanating from a cylinder suspended in a tank of water. The waves are believed to show a high frequency vibration, which is an artifact of the experiment equipment, as well as lower frequency vibration which reflects forces acting on the cylinder. It is of interest to identify and to filter out the high frequency vibration. \n\n\n\n\nVariable\n\nDescription\n\n\n\n\nWaves\n\nRelative vertical displacement at equi-spaced times\n", "download": "http://www.statsci.org/data/general/waves.txt", "filename": "waves_", "name": "Forces on a Cylinder Suspended in Water", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true + }, + { + "DateTime_format": "yyyy-MM-dd", + "comment_character": "#", + "create_index_column": false, + "description": "Scientist Robert Boyle examined the relationship between the volume in which a gas is contained and the pressure in its container. He used a cylindrical container with a moveable top that could be raised or lowered to change the volume. He measured the Height in inches by counting equally spaced marks on the cylinder, and", + "download": "https://dasl.datadescription.com/download/data/3083", + "filename": "Boyle", + "name": "Boyle", + "number_format": 31, + "remove_quotes": true, + "separator": "TAB", + "simplify_whitespaces": false, + "skip_empty_parts": true, + "use_first_row_for_vectorname": true } ], "subcategory_name": "Other" } ] }, { "category_name": "Chemistry", "subcategories": [ { "datasets": [ { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "The data give the concentrations at equi-spaced times of an intermediate compound during a chemical experiment involving a catalyst. The experiment was conducted in the Department of Chemistry at the Australian National University. The compound is producing exponentially during the first stage of the experiment and then is consumed exponentially during the second stage. Theoretically the process can be described by a compartment model, and the expected concentration of the compound over time is decribed by a second order differential equation. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nConcentration\n\nConcentration of intermediate compound\n", "download": "http://www.statsci.org/data/oz/sargeson.txt", "filename": "sargeson", "name": "Chemical Experiment with Catalyst", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "This dataset is distributed with S-Plus as the dataframe ethanol. \nThe engine exhaust was analysed in an experiment in which ethanol was burned in a single cylinder automobile test engine. The response variable is NOx, the concentration of nitric oxide (NO) and nitrogen dioxide (NO2) in the engine exhaust, normalized by the work done by the engine. The explanatory variables are the compression ratio of the engine and the equivalence ratio at which the engine was run - a measure of the richness of the air/ethanol mix. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nNOx\n\nConcentration of nitric oxide (NO) and nitrogen dioxide (NO2)\n\nCompression\n\nCompression ratio\n\nEquivalence\n\nEquivalence ratio\n", "download": "http://www.statsci.org/data/general/ethanol.txt", "filename": "ethanol", "name": "Exhaust from Burning Ethanol", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true + }, + { + "DateTime_format": "yyyy-MM-dd", + "comment_character": "#", + "create_index_column": false, + "description": "-", + "download": "https://dasl.datadescription.com/download/data/3112", + "filename": "Chromatography", + "name": "Chromatography", + "number_format": 31, + "remove_quotes": true, + "separator": "TAB", + "simplify_whitespaces": false, + "skip_empty_parts": true, + "use_first_row_for_vectorname": true } ], "subcategory_name": "General" } ] } ], "collection_name": "StatSci_Datasets" } diff --git a/src/backend/datasources/DatasetHandler.cpp b/src/backend/datasources/DatasetHandler.cpp index 839417883..f11df876c 100644 --- a/src/backend/datasources/DatasetHandler.cpp +++ b/src/backend/datasources/DatasetHandler.cpp @@ -1,307 +1,313 @@ /*************************************************************************** File : DatasetHandler.cpp Project : LabPlot Description : Processes a dataset's metadata file -------------------------------------------------------------------- Copyright : (C) 2019 Kovacs Ferencz (kferike98@gmail.com) ***************************************************************************/ /*************************************************************************** * * * This program is free software; you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * * the Free Software Foundation; either version 2 of the License, or * * (at your option) any later version. * * * * This program is distributed in the hope that it will be useful, * * but WITHOUT ANY WARRANTY; without even the implied warranty of * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * * GNU General Public License for more details. * * * * You should have received a copy of the GNU General Public License * * along with this program; if not, write to the Free Software * * Foundation, Inc., 51 Franklin Street, Fifth Floor, * * Boston, MA 02110-1301 USA * * * ***************************************************************************/ #include "backend/datasources/filters/AsciiFilter.h" #include "backend/datasources/DatasetHandler.h" #include #include #include #include #include #include #include #include #include #include #include /*! \class DatasetHandler \brief Provides functionality to process a metadata file of a dataset, configure a spreadsheet and filter based on it, download the dataset and load it into the spreadsheet. \ingroup datasources */ DatasetHandler::DatasetHandler(Spreadsheet* spreadsheet) : m_spreadsheet(spreadsheet), m_filter(new AsciiFilter), m_object(nullptr), m_downloadManager(new QNetworkAccessManager) { connect(m_downloadManager, &QNetworkAccessManager::finished, this, &DatasetHandler::downloadFinished); connect(this, &DatasetHandler::downloadCompleted, this, &DatasetHandler::processDataset); } DatasetHandler::~DatasetHandler() { delete m_downloadManager; delete m_filter; } /** * @brief Initiates processing the metadata file,, located at the given path, belonging to a dataset. * @param path the path to the metadata file */ void DatasetHandler::processMetadata(const QJsonObject& object, const QString& path) { m_object = new QJsonObject(object); qDebug("Start processing dataset..."); m_containingDir = path.left(path.lastIndexOf(QDir::separator())); qDebug() << m_containingDir; if(!m_object->isEmpty()) { configureFilter(); configureSpreadsheet(); prepareForDataset(); } } /** * @brief Marks the metadata file being invalid by setting the value of a flag, also pops up a messagebox. */ void DatasetHandler::markMetadataAsInvalid() { m_invalidMetadataFile = true; QMessageBox::critical(0, "Invalid metadata file", "The metadata file for the choosen dataset is invalid!"); } /** * @brief Configures the filter, that will be used later, based on the metadata file. */ void DatasetHandler::configureFilter() { qDebug("Configure filter"); if(!m_object->isEmpty()) { if(m_object->contains("separator")) m_filter->setSeparatingCharacter(m_object->value("separator").toString()); else markMetadataAsInvalid(); if(m_object->contains("comment_character")) m_filter->setCommentCharacter(m_object->value("comment_character").toString()); else markMetadataAsInvalid(); if(m_object->contains("create_index_column")) m_filter->setCreateIndexEnabled(m_object->value("create_index_column").toBool()); else markMetadataAsInvalid(); if(m_object->contains("skip_empty_parts")) m_filter->setSkipEmptyParts(m_object->value("skip_empty_parts").toBool()); else markMetadataAsInvalid(); if(m_object->contains("simplify_whitespaces")) m_filter->setSimplifyWhitespacesEnabled(m_object->value("simplify_whitespaces").toBool()); else markMetadataAsInvalid(); if(m_object->contains("remove_quotes")) m_filter->setRemoveQuotesEnabled(m_object->value("remove_quotes").toBool()); else markMetadataAsInvalid(); if(m_object->contains("use_first_row_for_vectorname")) m_filter->setHeaderEnabled(m_object->value("use_first_row_for_vectorname").toBool()); else markMetadataAsInvalid(); if(m_object->contains("number_format")) m_filter->setNumberFormat(QLocale::Language(m_object->value("number_format").toInt())); else markMetadataAsInvalid(); if(m_object->contains("DateTime_format")) m_filter->setDateTimeFormat(m_object->value("DateTime_format").toString()); else markMetadataAsInvalid(); } else { qDebug() << "Empty object"; markMetadataAsInvalid(); } } /** * @brief Configures the spreadsheet based on the metadata file. */ void DatasetHandler::configureSpreadsheet() { qDebug("Conf spreadsheet"); if(!m_object->isEmpty()) { if(m_object->contains("name")) m_spreadsheet->setName( m_object->value("name").toString()); else markMetadataAsInvalid(); if(m_object->contains("description")) m_spreadsheet->setComment(m_object->value("description").toString()); } else { markMetadataAsInvalid(); } } /** * @brief Extracts the download URL of the dataset and initiates the process of download. */ void DatasetHandler::prepareForDataset() { qDebug("Start downloading dataset"); if(!m_object->isEmpty()) { if(m_object->contains("download")) { const QString& url = m_object->value("download").toString(); const QUrl downloadUrl = QUrl::fromEncoded(url.toLocal8Bit()); doDownload(url); } else { QMessageBox::critical(0, i18n("Invalid metadata file"), i18n("There is no download URL present in the metadata file!")); } } else { markMetadataAsInvalid(); } } /** * @brief Starts the download of the dataset. * @param url the download URL of the dataset */ void DatasetHandler::doDownload(const QUrl& url) { qDebug("Download request"); QNetworkRequest request(url); m_currentDownload = m_downloadManager->get(request); connect(m_currentDownload, &QNetworkReply::downloadProgress, [this] (qint64 bytesReceived, qint64 bytesTotal) { double progress; if (bytesTotal == -1) progress = 0; else progress = 100 * (static_cast(bytesReceived) / static_cast(bytesTotal)); qDebug() << "Progress: " << progress; emit downloadProgress(progress); }); } /** * @brief Called when the download of the dataset is finished. */ void DatasetHandler::downloadFinished(QNetworkReply* reply) { qDebug("Download finished"); const QUrl& url = reply->url(); if (reply->error()) { qDebug("Download of %s failed: %s\n", url.toEncoded().constData(), qPrintable(reply->errorString())); } else { if (isHttpRedirect(reply)) { qDebug("Request was redirected.\n"); } else { QString filename = saveFileName(url); if (saveToDisk(filename, reply)) { qDebug("Download of %s succeeded (saved to %s)\n", url.toEncoded().constData(), qPrintable(filename)); m_fileName = filename; emit downloadCompleted(); } } } m_currentDownload = nullptr; reply->deleteLater(); } /** * @brief Checks whether the GET request was redirected or not. */ bool DatasetHandler::isHttpRedirect(QNetworkReply* reply) { const int statusCode = reply->attribute(QNetworkRequest::HttpStatusCodeAttribute).toInt(); // TODO enum/defines for status codes ? return statusCode == 301 || statusCode == 302 || statusCode == 303 || statusCode == 305 || statusCode == 307 || statusCode == 308; } /** * @brief Returns the name and path of the file that will contain the content of the reply (based on the URL). * @param url */ QString DatasetHandler::saveFileName(const QUrl& url) { const QString path = url.path(); - QString basename = QFileInfo(path).fileName(); + + //get the extension of the downloaded file + const QString downloadFileName = QFileInfo(path).fileName(); + int lastIndex = downloadFileName.lastIndexOf("."); + const QString fileExtension = lastIndex >= 0 ? downloadFileName.right(downloadFileName.length() - lastIndex) : ""; + + QString basename = m_object->value("filename").toString() + fileExtension; if (basename.isEmpty()) basename = "download"; QString fileName = m_containingDir + QDir::separator() + basename; - + QFileInfo fileInfo (fileName); if (QFile::exists(fileName)) { - // already exists, don't overwrite - int i = 0; - fileName += '.'; - while (QFile::exists(fileName + QString::number(i))) - ++i; - fileName += QString::number(i); + if(fileInfo.lastModified().addDays(1) < QDateTime::currentDateTime()){ + QFile removeFile (fileName); + removeFile.remove(); + } else { + qDebug() << "Dataset file already exists, no need to download it again"; + } } return fileName; } /** * @brief Saves the content of the network reply to the given path under the given name. */ bool DatasetHandler::saveToDisk(const QString& filename, QIODevice* data) { QFile file(filename); if (!file.open(QIODevice::WriteOnly)) { qDebug("Could not open %s for writing: %s\n", qPrintable(filename), qPrintable(file.errorString())); return false; } file.write(data->readAll()); file.close(); return true; } /** * @brief Processes the downloaded dataset with the help of the already configured filter. */ void DatasetHandler::processDataset() { m_filter->readDataFromFile(m_fileName, m_spreadsheet); configureColumns(); } /** * @brief Configures the columns of the spreadsheet, based on the metadata file. */ void DatasetHandler::configureColumns() { if(!m_object->isEmpty()) { int index = 0; const int columnsCount = m_spreadsheet->columnCount(); while(m_object->contains(i18n("column_description_%1", index)) && (index < columnsCount)) { m_spreadsheet->column(index)->setComment(m_object->value(i18n("column_description_%1", index)).toString()); ++index; } } else { qDebug("Invalid Json document"); } }