diff --git a/data/datasets/OzDASL.json b/data/datasets/DASL.json similarity index 51% copy from data/datasets/OzDASL.json copy to data/datasets/DASL.json index effd5e624..2452fb8a3 100644 --- a/data/datasets/OzDASL.json +++ b/data/datasets/DASL.json @@ -1,9965 +1,6130 @@ { "categories": [ { "category_name": "Medicine", "subcategories": [ { "datasets": [ { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "West of Tokyo lies a large alluvial plain, dotted by a network of farming villages. Matui (1968) analysed the position of the 911 houses making up one of those villages. The area studied was a rectangle, 3 km by 4 km. A grid was superimposed over a map of the village, dividing its 12 square kilometres into 1200 plots, each 100 metres on a side. The numbers of houses on each of those plots are recorded in a 30 by 40 matrix of data.", - "download": "http://www.statsci.org/data/general/matui.txt", - "filename": "matui", - "name": "Position of Houses in a Japanese Farming Village", - "number_format": 31, - "remove_quotes": true, - "separator": "SPACE", - "simplify_whitespaces": false, - "skip_empty_parts": false, - "use_first_row_for_vectorname": false - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Larsen and Marx (1986) write \nSince Word War II, plutonium for use in atomic weapons has been produced at an Atomic Energy Commission facility in Hanford, Washington. One of the major safety problems encountered there has been the storage of radioactive wastes. Over the years, significant quantities of these substances - including strontium 90 and cesium 137 - have leaked from their open-pit storage areas into the nearby Columbia River, which flows along the Washington-Oregon border, and eventually empties into the Pacific Ocean. \nTo measure the health consequences of this contamination, an index of exposure was calculated for each of the nine Oregon counties having frontage on either the Columbia River or the Pacific Ocean. This particular index was based on several factors, including the county's stream distance from Hanford and the average distance of its population from any water frontage. As a covariate, the cancer mortality rate was determined for each of these same counties. \nThe data give the index of exposure and the cancer mortality rate during 1959-1964 for the nine Oregon counties affected. Higher index values represent higher levels of contamination. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nCounty\n\nName of county\n\nExposure\n\nIndex of exposure\n\nMortality\n\nCancer mortality per 100,000 man-years\n\n\n\n", - "download": "http://www.statsci.org/data/general/hanford.txt", - "filename": "hanford", - "name": "Cancer Mortality near Hanford Reactor", + "description": "A group of female college students took a test that measured their verbal IQs and also underwent an MRI scan to measure the size of their brains (in 1000s of pixels)", + "download": "https://dasl.datadescription.com/download/data/3084", + "filename": "Brain-size", + "name": "Brain size", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "The data show the incidence of nonmelanoma skin cancer among women in Minneapolis-St Paul, Minnesota, and Dallas-Fort Worth, Texas. The towns are coded 0 for St Paul and 1 for Forth Worth. \nOne would expect sun exposure to be greater in Texas than in Minnesota.", - "download": "http://www.statsci.org/data/general/skin.txt", - "filename": "skin", - "name": "Skin Cancer in Texas and Minnesota", + "description": "An experiment was performed to see whether sensory deprivation over an extended period of time has any effect on the alpha-wave patterns produced by the brain. To determine this, 20 subjects, inmates in a Canadian prison, were randomly split into two groups. Members of one group were placed in solitary confinement. Those in the other […] ", + "download": "https://dasl.datadescription.com/download/data/3085", + "filename": "Brain-waves", + "name": "Brain waves", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "The data comes from an experiment to measure the mortality of cancer cells under radiation under taken in the Department of Radiology, University of Cape Town. Four hundred cells were placed on a dish, and three dishes were irradiated at a time, or occasion. After the cells were irradiated, the surviving cells were counted. Since cells would also die naturally, dishes with cells were put into the radiation chamber without being irradiated, to establish the natural mortality. This data gives only these zero-dose data. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nOccasion\n\nIrradiation occasion (1-27)\n\nSurvived\n\nNumber of cells surviving out of 400 placed on dish\n", - "download": "http://www.statsci.org/data/general/radiatio.txt", - "filename": "radiatio", - "name": "Mortality of Cancer Cells", + "description": "A study examined brain size (measured as pixels counted in a digitized magnetic resonance image [MRI] of a cross section of the brain) and IQ (4 performance scales of the Wechsler IQ test) for college students. The data give the Performance IQ scores and Brain Size. ", + "download": "https://dasl.datadescription.com/download/data/3301", + "filename": "IQ-Brain", + "name": "IQ Brain", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true - }, + } + ], + "subcategory_name": "Neurology" + }, + { + "datasets": [ { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "The readabilities of 30 pamphlets about cancer are compared to the\nreading comprehension levels of 63 patients with cancer. Both\nvariables are measured in grade levels. The data are presented as\nfrequencies of occurrence over grade levels for both the pamphlet\nreadabilities and the reading levels of the patients.\n\nVARIABLE DESCRIPTIONS:\nColumns\n1 - 2 Grade level \n4 - 5 Frequency of occurrence for brochure readabilities\n7 - 8 Frequency of occurrence for patient reading levels", - "download": "http://jse.amstat.org/datasets/readability.dat.txt", - "filename": "readability", - "name": "Readability of Educational Materials for Patients with Cancer", + "description": "The Framingham Heart Study is one of the longest running health studies. It has followed original subjects, their children, and their grand children, looking for factors that affect cardiac health.\nThese data only include\nsubjects whose cholesterol was measured in the first exam.\nSource: “Statistical Methods in Epidemiology” by H.A.Kahn and C.T.Sempos\nSBP: Systolic blood pressure at first exam\nDBP: Diastolic blood pressure at first exam\nCHOL: Serum choloesterol at first exam\nFRW : Framingham relative weight; a standardized measure of weight adjusted for sex and height\nCIG: Number of cigarettes smoked/day at first exam\nDEATH: First biannual exam missed due to death; 0=”alive at tenth biannual exam.” (This exam wasgiven in the 18th year of the study.)\nCAUSE: 0=aliv e at exam 10, 1=Coronary Heart Disease (sudden), 2=CHD (not sudden), 3=Stroke,4=Other cardiovascular disease, 5=cancer, 6=other", + "download": "https://dasl.datadescription.com/download/data/3217", + "filename": "Framingham", + "name": "Framingham", "number_format": 31, "remove_quotes": true, - "separator": "SPACE", + "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, - "use_first_row_for_vectorname": false + "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "The file ch14.dat contains the following 19 variables:\n\nPatient ID \nDate on study (MMDDYY)\nTreatment arm (D= daunorubicin, I= idarubicin)\nSex (M= male, F= female)\nAge (years)\nFAB classification (1 - 6)\nKarnofsky score (0 - 100) \nBaseline white blood cells (in thousands per cubic millimeter)\nBaseline platelets (in thousands per cubic millimeter)\nBaseline hemoglobin (g/dl)\nEvaluable (Y= yes, N= no)\nComplete remission (CR) (Y= yes, N= no)\nCourses of chemotherapy to CR\nDate of CR (MMDDYY)\nDate of last follow-up (MMDDYY)\nStatus at last follow-up (D= dead, A= alive)\nBone marrow transplant (Y= yes, N= no)\nDate of bone marrow transplant (MMDDYY)\nInclusion in June 30, 1988 analysis (Y= yes, N= no)", - "download": "http://lib.stat.cmu.edu/datasets/csb/ch14.dat", - "filename": "Leukemia-Trial", - "name": "Interpretation of a Leukemia Trial Stopped Early", + "description": "number of days spent in hospital by patients admitted to hospitals in New York during one year with a primary diagnosis of acute myocardial infarction (heart attack). Data are from public medicare records. Consider the distribution of stays. The data also include the age and sex of the patient and the diagnostic (DRG) code. \n", + "download": "https://dasl.datadescription.com/download/data/3263", + "filename": "Heart-attack-charges", + "name": "Heart attack charges", "number_format": 31, "remove_quotes": true, - "separator": "SPACE", + "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, - "use_first_row_for_vectorname": false + "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "The file ch15.dat contains the following variables:\n\n Patient ID : Integer\n \n Institution : 0 - Memorial Sloan-Kettering,\n 1 - Mayo Clinic,\n 2 - John Hopkins.\n Group : 1 - Study,\n 0 - Control.\n\n Means of Detection : 0 - Routine Cytology,\n 1 - Routine X-ray,\n 2 - Both X-ray and Cytology,\n 3 - Interval.\n\n Cell Type : 0 - Epidermoid,\n 1 - Adenocarcinoma,\n 2 - Large Cell,\n 3 - Oat Cell,\n 4 - Other.\n Stage : 4 digits, 1st digit (1,2,3) - overall stage,\n 2nd digit (1,2,3) - tumor,\n 3rd digit (0,1,2) - lymph nodes\n 4th digit (0,1) - distant metastases\n Operated : 1 - yes,\n 0 - no.\n Survival : Integer - Days from detection to last date known alive.\n Survival Category : 0 - Alive,\n 1 - Dead of lung cancer,\n 2 - Dead of other causes.\n\n Missing values - '-'.", - "download": "http://lib.stat.cmu.edu/datasets/csb/ch15.dat", - "filename": "Lung-Cancer", - "name": "Early Lung Cancer Detection Studies", + "description": "Number of days spent in hospital by female patients admitted to hospitals in New York during one year with a primary diagnosis of acute myocardial infarction (heart attack). Data are from public medicare records. Consider the distribution of stays. The data also include the age of the patient", + "download": "https://dasl.datadescription.com/download/data/3264", + "filename": "Heart-attack-stays", + "name": "Heart attack stays", "number_format": 31, "remove_quotes": true, - "separator": "SPACE", + "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, - "use_first_row_for_vectorname": false + "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "The file ch16a.dat contains extent of scleral extension\n(extent to which the tumor has invaded the sclera or \"white of the eye\")\nas coded by two raters for each of 885 eyes. There is one record for each\neye; the first field of each record contains a patient identifier, the\nsecond field contains the code for scleral extension assigned by rater A,\nand the third field contains the code for scleral extension assigned by\nrater B. The coding scheme is:\n\n1=None or innermost layers\n2=Within sclera, but does not extend to scleral surface\n3=Extends to scleral surface\n4=Extrascleral extension without transection\n5=Extrascleral extension with presumed residual tumor in the orbit\n\nThe collaborative Ocular Melanoma Study (COMS) owns the\ncopyright to this dataset; these data are considered preliminary due\nto the ongoing nature of the COMS clinical trials.", - "download": "http://lib.stat.cmu.edu/datasets/csb/ch16a.dat", - "filename": "Choroidal-Melanoma", - "name": "Modeling Interrater Agreement for Pathological Features of Choroidal Melanoma", + "description": "A medical researcher measured the pulse rates (beats per minute) of a sample of randomly selected adults.", + "download": "https://dasl.datadescription.com/download/data/3413", + "filename": "Pulse-rates", + "name": "Pulse rates", "number_format": 31, "remove_quotes": true, - "separator": "SPACE", + "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, - "use_first_row_for_vectorname": false - }, + "use_first_row_for_vectorname": true + } + ], + "subcategory_name": "Cardiology" + }, + { + "datasets": [ { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "The file ch16b.dat contains the degree of necrosis (tissue\ndeath) data for 3 raters. The first field contains a patient identifier,\nand the second, third, and fourth fields contain the code for degree of\nnecrosis as assigned by raters A, B, and C, respectively. The coding\nscheme is:\n\n1=None\n2=Less than 10% of cells\n3=Greater than or equal to 10% of cells\n\n\nThe collaborative Ocular Melanoma Study (COMS) owns the\ncopyright to this dataset; these data are considered preliminary due\nto the ongoing nature of the COMS clinical trials.", - "download": "http://lib.stat.cmu.edu/datasets/csb/ch16b.dat", - "filename": "Choroidal-Melanoma-2", - "name": "Modeling Interrater Agreement for Pathological Features of Choroidal Melanoma", + "description": "Does blood pressure, on average, change with age. The data here are two categorical variables: Blood pressure categorized as High, Normal, Low, and Age categorized as under 30, 30-49, and over 50", + "download": "https://dasl.datadescription.com/download/data/3077", + "filename": "Blood-Pressure", + "name": "Blood Pressure", "number_format": 31, "remove_quotes": true, - "separator": "SPACE", + "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, - "use_first_row_for_vectorname": false + "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "These data remains the copyright of the Harris Birthright Research Unit\nof the University of Aberdeen, UK. It may be used freely for\nnon-commercial purposes and can be freely distributed provided its\nsource is acknowledged.\n\nThe file ch18a.dat contains the following individual-specific variables:\n\nVariable Coding\nControl/patient code 0=control, 1=patient\nStudy number 1-500 for each group\nNumber of smears 1-15\nBiopsy result 0=negative, 1=positive \n 9=missing (no biopsy)\t\nNumber of days from 0-840 if biopsy done, \nlast smear to biopsy -1 if no biopsy", - "download": "http://lib.stat.cmu.edu/datasets/csb/ch18a.dat", - "filename": "Cervical-Cancer", - "name": "Modeling the Precursors of Cervical Cancer", + "description": "Thirteen overweight women volunteered for a study to determine whether eating specially prepared crackers before a meal could help them lose weight. The subjects were randomly assigned to eat crackers with different types of fiber (bran fiber, gum fiber, both, and a control cracker) and cycled through several of the cracker alternatives. Unfortunately, some of the women developed uncomfortable bloating and upset stomachs. Researchers suspected that some of the crackers might be at fault. The study was paid for by the manufacturers of the gum fiber, who hoped this would be a new diet tool. What would you recommend to them about the prospects for marketing their new diet cracker?", + "download": "https://dasl.datadescription.com/download/data/3163", + "filename": "Diet", + "name": "Diet", "number_format": 31, "remove_quotes": true, - "separator": "SPACE", + "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, - "use_first_row_for_vectorname": false + "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "These data remains the copyright of the Harris Birthright Research Unit\nof the University of Aberdeen, UK. It may be used freely for\nnon-commercial purposes and can be freely distributed provided its\nsource is acknowledged.\n\nThe file ch18a.dat contains the following smear-specific variables:\n\nVariable Coding \nControl/patient code 0=control, 1=patient \nStudy number 1-500 for each group \nSmear number 1-15 \nSmear grade 0=negative, 1=positive \nInterval in days 0-3733, 0 if 1st smear \nsince last smear", - "download": "http://lib.stat.cmu.edu/datasets/csb/ch18b.dat", - "filename": "Cervical-Cancer\n", - "name": "Modeling the Precursors of Cervical Cancer\n", + "description": "Medical researchers followed 6272 Swedish men for 30 years to see whether there\nwas any association between the amount of fish in their diet and prostate cancer. The original study actually used pairs of twins, which enabled the researchers to discern that the risk of cancer for those who never ate fish actually was substantially greater.", + "download": "https://dasl.datadescription.com/download/data/3207", + "filename": "Fish-diet", + "name": "Fish diet", "number_format": 31, "remove_quotes": true, - "separator": "SPACE", + "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, - "use_first_row_for_vectorname": false - } - ], - "subcategory_name": "Oncology" - }, - { - "datasets": [ - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The data gives the time between 800 successive pulses along a nerve fibre. There are 799 observations rounded to the nearest half in units of 1/50 second. ", - "download": "http://www.statsci.org/data/general/nerve.txt", - "filename": "nerve", - "name": "Time between Nerve Pulses", - "number_format": 31, - "remove_quotes": true, - "separator": "auto", - "simplify_whitespaces": true, - "skip_empty_parts": false, - "use_first_row_for_vectorname": false + "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "A group of female college students took a test that measured their verbal IQs and also underwent an MRI scan to measure the size of their brains (in 1000s of pixels)", - "download": "https://dasl.datadescription.com/download/data/3084", - "filename": "Brain-size", - "name": "Brain size", + "description": "A student decided to investigate just how effective washing with soap is in eliminating bacteria. To do this she tested four different methods—washing with water only, washing with regular soap, washing with antibacterial soap (ABS), and spraying hands with antibacterial spray (AS) (containing 65% ethanol as an active ingredient). Her experiment consisted of one experimental factor, the washing Method, at four levels.\nShe suspected that the number of bacteria on her hands before washing might vary considerably from day to day. To help even out the effects of those changes, she generated random numbers to determine the order of the four treatments. Each morning, she washed her hands according to the treatment randomly chosen. Then she placed her right hand on a sterile media plate designed to encourage bacteria growth. She incubated each plate for 2 days at 36°C, after which she counted the bacteria colonies. She replicated this procedure 8 times for each of the four treatments.", + "download": "https://dasl.datadescription.com/download/data/3254", + "filename": "Hand-washing", + "name": "Hand washing", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "An experiment was performed to see whether sensory deprivation over an extended period of time has any effect on the alpha-wave patterns produced by the brain. To determine this, 20 subjects, inmates in a Canadian prison, were randomly split into two groups. Members of one group were placed in solitary confinement. Those in the other […] ", - "download": "https://dasl.datadescription.com/download/data/3085", - "filename": "Brain-waves", - "name": "Brain waves", + "description": "The heights and weights of students in a statistics class were recorded. ", + "download": "https://dasl.datadescription.com/download/data/3265", + "filename": "Heights-weights", + "name": "Heights and weights", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "A study examined brain size (measured as pixels counted in a digitized magnetic resonance image [MRI] of a cross section of the brain) and IQ (4 performance scales of the Wechsler IQ test) for college students. The data give the Performance IQ scores and Brain Size. ", - "download": "https://dasl.datadescription.com/download/data/3301", - "filename": "IQ-Brain", - "name": "IQ Brain", + "description": "Canadian researcher John Coates took saliva samples in\nthe morning, twice a day for eight days, from 17 men working on a London\nmid-size trading f loor (trading a wide range of assets, with largest exposure to\nGerman interest rate futures), in June 2005, and classified each trader according\nto whether his testosterone level was high or low on that day (compared\nwith the trader’s median over the period). High testosterone days differed from\ntrader to trader, and high days differed from low days on average by 25% in\ntestosterone level. He also recorded the profits or losses (P&L) in pounds sterling\nof each trader during 11 am–4 pm daily.", + "download": "https://dasl.datadescription.com/download/data/3272", + "filename": "Hormones", + "name": "Hormones", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true - } - ], - "subcategory_name": "Neurology" - }, - { - "datasets": [ + }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "Students in an introductory statistics class (MS212 taught by Professor John Eccleston and Dr Richard Wilson at The University of Queensland) participated in a simple experiment. The students took their own pulse rate. They were then asked to flip a coin. If the coin came up heads, they were to run in place for one minute. Otherwise they sat for one minute. Then everyone took their pulse again. The pulse rates and other physiological and lifestyle data are given in the data. \nFive class groups between 1993 and 1998 participated in the experiment. The lecturer, Richard Wilson, was concerned that some students would choose the less strenuous option of sitting rather than running even if their coin came up heads, so in the years 1995-1998 a different method of random assignment was used. In these years, data forms were handed out to the class before the experiment. The forms were pre-assigned to either running or non-running and there were an equal number of each. In 1995 and 1998 not all of the forms were returned so the numbers running and sitting was still not entirely controlled.", - "download": "http://www.statsci.org/data/oz/ms212.txt", - "filename": "ms212", - "name": "Pulse Rates before and after Exercise", + "description": "Since the 1960s, the Centers for Disease Control and Prevention’s National Center for Health Statistics has been collecting health and nutritional information on people of all ages and backgrounds. The National Health and Nutrition Examination Survey (NHANES) of 2001–2002, measured a wide variety of variables, including body measurements, cardiovascular fitness, blood chemistry, and demographic information on more than 11,000 individuals.\nThe file holds data on the weights of 80 men between 19 and 24 years old of average height (between 5′8″ and 5′10″ tall).", + "download": "https://dasl.datadescription.com/download/data/3337", + "filename": "Mens-Weights", + "name": "Mens Weights", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "A study was conducted at a major north eastern American medical centre regarding blood cholesterol levels and heart-attack incidents. A total of 28 heart-attack patients had their cholesterol levels measured two days, 4 days, and 14 days after the attack. In addition, cholesterol levels were recorded for a control group of 30 people who had not had a heart attack. The units of cholesterol measurement are not given in the original reference but are presumably mg/dL of blood.", - "download": "http://www.statsci.org/data/general/cholestg.txt", - "filename": "cholestg", - "name": "Cholesterol Levels after Heart Attack", + "description": "In 1879, A. A. Michelson made 100 determinations of the velocity\nof light in air using a modification of a method proposed by the French\nphysicist Foucault. The data are given here as reported by Stigler.\nThe measurements are derived from sets of often widely disparate\nnumbers of observations. The numbers are in km/sec, and have had\n299,000 subtracted from them. The currently accepted “true”\nvelocity of light in vacuum is 299,792.5 km/sec. Stigler has\napplied the corrections used by Michelson and reports that the\n“true” value appropriate for comparison to these measurements\nis 734.5. Each trial may be a summary of several experimental\nobservations.", + "download": "https://dasl.datadescription.com/download/data/3338", + "filename": "Michelson_", + "name": "Michelson", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "This was a pilot study for the experiment described in Recovery of Patients from Stroke. The purpose of the study was to compare four evaluation tools for assessing the recovery of patients who had recently suffered a stroke. The four tools were (1) the Goteburg Assessment Form of Hemiplegia, (2) the Bobath Assessment Form, (3) the Barthel Index and (4) the Kenny Scoring System. The Goteburg Assessment was divided into seven components measuring motor function and balance, some sensation qualities, passive range of motion and occurrence of joint pain. The Bobath from evaluates three areas of motor performance, postural reactions, voluntary movement, and balance and automatic protective reactions. The Barthel index and the Kenny Scoring system evaluate ability to carry out activities of daily living such as dressing, feeding, toileting etc. \nTwenty subjects were selected from two large public hospitals in Brisbane. All subjects had recently suffered a cerebrovascular accident resulting in hemiplegia lasting at least 24 hours, had not previously been incapacitated from stroke or other disease and were currently receiving occupational therapy. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nSubject \n\nSubject ID (1-20)\n\nSex\n\nMale (M) or female (F)\n\nSide\n\nSide of brain affected, left (L) or right (R)\n\nAge\n\nAge of subject in years\n\nLapse\n\nTime since occurrence of stroke in weeks\n\nArms\n\nArm and shoulder motor function (max 36)\n\nLegs\n\nLower limb motor function (max 30)\n\nHands\n\nWrist and hand motor function (max24)\n\nBalance\n\nBalance score (max 14)\n\nSensation\n\nSensation score (max 24)\n\nJointPain\n\nFreedom from joint pain (max 24)\n\nJointMotion\n\nPassive joint motion (max 24)\n\nBobath\n\nTotal of Bobath Assessment Form (max 266)\n\nBarthel\n\nBarthel Index (max 100)\n\nKenny\n\nKenny scoring system of dailing living (max 24)\n\n\n\n\nThe researcher chose the Barthel Index and the first five components of the Goteburg Evaluation for use in the later experiment.", - "download": "www.statsci.org/data/oz/strokeass.txt", - "filename": "strokeass", - "name": "Evaluation Tools for Stroke Rehabilitation", + "description": "The National Health and Nutrition Examination Survey (NHANES) is a program of studies designed to assess the health and nutritional status of adults and children in the United States. The survey is unique in that it combines interviews and physical examinations. ", + "download": "https://dasl.datadescription.com/download/data/3365", + "filename": "NHANES", + "name": "NHANES", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "This study compared three occupational therapy programs designed to help patients recover from the effects of a stroke. Eight stroke patients were assigned to each of the three treatment groups. The first group (E) was given an experimental program developed by the investigator from a model of intervention for stroke rehabilitation. The second group (F) was given a pre-existing program. The third group (G) was a non-treatment program. Each program lasted for 8 weeks. All subjects were evaluated at the start of the program and at weekly intervals until the next of the program. \nGroup E and F patients were treated in the Occupational Therapy Department of a large Brisbane repatriation hospital. Group G patients were located in the wards of a large State Hospital in Brisbane. \nThe recovery status of each subject at each time was evaluated using the Goteburg Evaluation of Hemiplegia and the Barthel Index. The Goteburg evalation form gave separate scores for three motor function variables (upper limbs, hand and wrist, lower limbs) and for balance while the Barthel Index gave a single overall score. Higher scores indicate better functional ability. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nSubject \n\nSubject ID\n\nGroup\n\nExperimental (E), pre-existing (F) or non-treatment (G)\n\nSex\n\nMale (M) or female (F)\n\nSide\n\nSide of brain affected, left (L) or right (R)\n\nAge\n\nAge of subject in years\n\nLapse\n\nTime lapse from stroke to start of program in weeks\n\nUE1\n\nUpper extremities score (out of 36) at week 1\n\nUE2\n\n... week 2\n\nUE3\n\n... week 3\n\nUE4\n\n... week 4\n\nUE5\n\n... week 5\n\nUE6\n\n... week 6\n\nUE7\n\n... week 7\n\nUE8\n\n... week 8\n\nHW1\n\nHand-wrist score (out of 24) at week 1\n\nHW2\n\n... week 2\n\nHW3\n\n... week 3\n\nHW4\n\n... week 4\n\nHW5\n\n... week 5\n\nHW6\n\n... week 6\n\nHW7\n\n... week 7\n\nHW8\n\n... week 8\n\nLE1\n\nLower extremities score (out of 30) at week 1\n\nLE2\n\n... week 2\n\nLE3\n\n... week 3\n\nLE4\n\n... week 4\n\nLE5\n\n... week 5\n\nLE6\n\n... week 6\n\nLE7\n\n... week 7\n\nLE8\n\n... week 8\n\nBal1\n\nBalance score (out of 14) at week 1\n\nBal2\n\n... week 2\n\nBal3\n\n... week 3\n\nBal4\n\n... week 4\n\nBal5\n\n... week 5\n\nBal6\n\n... week 6\n\nBal7\n\n... week 7\n\nBal8\n\n... week 8\n\nBart1\n\nBarthel Index score (out of 100) at week 1\n\nBart2\n\n... week 2\n\nBart3\n\n... week 3\n\nBart4\n\n... week 4\n\nBart5\n\n... week 5\n\nBart6\n\n... week 6\n\nBart7\n\n... week 7\n\nBart8\n\n... week 8\n", - "download": "http://www.statsci.org/data/oz/stroke.txt", - "filename": "stroke", - "name": "Recovery of Patients from Stroke", + "description": "Body temperatures of a random sample of 52 healthy adults, reported in degrees Fahrenheit. ", + "download": "https://dasl.datadescription.com/download/data/3368", + "filename": "Normal-temperature", + "name": "Normal temperature", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "The Framingham Heart Study is one of the longest running health studies. It has followed original subjects, their children, and their grand children, looking for factors that affect cardiac health.\nThese data only include\nsubjects whose cholesterol was measured in the first exam.\nSource: “Statistical Methods in Epidemiology” by H.A.Kahn and C.T.Sempos\nSBP: Systolic blood pressure at first exam\nDBP: Diastolic blood pressure at first exam\nCHOL: Serum choloesterol at first exam\nFRW : Framingham relative weight; a standardized measure of weight adjusted for sex and height\nCIG: Number of cigarettes smoked/day at first exam\nDEATH: First biannual exam missed due to death; 0=”alive at tenth biannual exam.” (This exam wasgiven in the 18th year of the study.)\nCAUSE: 0=aliv e at exam 10, 1=Coronary Heart Disease (sudden), 2=CHD (not sudden), 3=Stroke,4=Other cardiovascular disease, 5=cancer, 6=other", - "download": "https://dasl.datadescription.com/download/data/3217", - "filename": "Framingham", - "name": "Framingham", + "description": "Obesity and exercise", + "download": "https://dasl.datadescription.com/download/data/3372", + "filename": "Obesity-and-exercise", + "name": "Obesity and exercise", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "number of days spent in hospital by patients admitted to hospitals in New York during one year with a primary diagnosis of acute myocardial infarction (heart attack). Data are from public medicare records. Consider the distribution of stays. The data also include the age and sex of the patient and the diagnostic (DRG) code. \n", - "download": "https://dasl.datadescription.com/download/data/3263", - "filename": "Heart-attack-charges", - "name": "Heart attack charges", + "description": "Story: \nThe Pima Indians of southern Arizona are a unique community. Their ancestors were among the first people to cross over into the Americas some 30,000 years ago. For at least two millennia, they have lived in the Sonoran Desert near the Gila River. Known throughout history as a generous people, they have given of themselves for the past 30 years helping researchers at the National Institutes of Health study certain diseases like diabetes and obe-sity. Young Pima Indians often marry other Pimas, making them an ideal group for genetic researchers to study. Pimas also have an extremely high incidence of diabetes.\nResearchers investigating factors for increased risk of diabetes examined data on 768 adult women of Pima Indian heritage. One possible predictor is the body mass index, BMI, calculated as weight/height2, where weight is measured in kilograms and height in meters. We are interested in the relationship between BMI and the incidence of diabetes. ", + "download": "https://dasl.datadescription.com/download/data/3394", + "filename": "Pima-indians", + "name": "Pima indians", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "Number of days spent in hospital by female patients admitted to hospitals in New York during one year with a primary diagnosis of acute myocardial infarction (heart attack). Data are from public medicare records. Consider the distribution of stays. The data also include the age of the patient", - "download": "https://dasl.datadescription.com/download/data/3264", - "filename": "Heart-attack-stays", - "name": "Heart attack stays", + "description": "Pregnancies", + "download": "https://dasl.datadescription.com/download/data/3404", + "filename": "Pregnancies", + "name": "Pregnancies", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "A medical researcher measured the pulse rates (beats per minute) of a sample of randomly selected adults.", - "download": "https://dasl.datadescription.com/download/data/3413", - "filename": "Pulse-rates", - "name": "Pulse rates", + "description": "The Sleep Foundation (www.sleepfoundation.org) says that adults should get at least 7 hours of sleep each night. A survey of students at a small school in the northeast U.S. asked, among other things, “How much did you sleep last night?” The data are the responses. ", + "download": "https://dasl.datadescription.com/download/data/3453", + "filename": "Sleep", + "name": "Sleep", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true } ], - "subcategory_name": "Cardiology" + "subcategory_name": "Common" }, { "datasets": [ { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "How difficult is it to maintain your balance while concentrating? It is more difficult when you are older? Nine elderly (6 men and 3 women) and eight young men were subjects in an experiment. Each subject stood barefoot on a \"force platform\" and was asked to maintain a stable upright position and to react as quickly as possible to an unpredictable noise by pressing a hand held button. The noise came randomly and the subject concentrated on reacting as quickly as possible. The platform automatically measured how much each subject swayed in millimetres in both the forward/backward and the side-to-side directions.", - "download": "http://www.statsci.org/data/general/balaconc.txt", - "filename": "balaconc", - "name": "Maintaining Balance while Concentrating", + "description": "A pharmaceutical company tested three formulations of a pain relief medicine for migraine headache sufferers. For the experiment, 27 volunteers were selected and 9 were randomly assigned to one of three drug formulations. The subjects were instructed to take the drug during their next migraine headache episode and to report their pain on a scale […] ", + "download": "https://dasl.datadescription.com/download/data/3053", + "filename": "Analgesics", + "name": "Analgesics", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "The data comes from a small study in Western Australia of hypertension, alcohol, and obesity. This study was partly designed to mimic a previously reported U.S. study based on a larger sample. A log-linear interaction model is a convenient and effective way of investigating associations among the three variables. A prior-posterior analysis of this 3 x 2 x 4 contingency table using prior information from the previous study (Klatsky et al., 1977) may be appropriate. The previous study reported the general conclusion that alcohol intake and obesity were significantly and independently associated with hypertension (blood pressure). Although a few summary statistics were reported, the full data were not published. One difference between the two studies was in the definition of obesity categories.\nThe data is listed as follows: the first column (Obesity) contains a numerical value representing the level of obesity (1=low, 2=average, 3=high), the second column (BP) contains a numerical indicator of the presence of hypertension (0=no, 1 =yes). The next five columns are labelled with the levels of alcoholic intake of the subjects, in drinks per day. These columns contain the frequency of observations that have this level of intake, for each group of obesity level and hypertension presence.", - "download": "http://www.statsci.org/data/oz/alchyp.txt", - "filename": "alchyp", - "name": "Alcohol, Hypertension and Obesity", + "description": "A study compared the effectiveness of several antidepressants by examining the experiments in which they had passed the FDA requirements. Each of those experiments compared the active drug with a placebo, an inert pill given to some of the subjects. In each experiment some patients treated with the placebo had improved, a phenomenon called the […] ", + "download": "https://dasl.datadescription.com/download/data/3054", + "filename": "Antidepressants", + "name": "Antidepressants", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "FEV (forced expiratory volume) is an index of pulmonary function that measures the volume of air expelled after one second of constant effort. The data contains determinations of FEV on 654 children ages 6-22 who were seen in the Childhood Respiratory Desease Study in 1980 in East Boston, Massachusetts. The data are part of a larger study to follow the change in pulmonary function over time in children. \nID\n - \nID number\nAge\n - \nyears\nFEV\n - \nlitres\nHeight\n - \ninches\nSex\n - \nMale or Female\nSmoker\n - \nNon = nonsmoker, Current = current smoker\n", - "download": "http://www.statsci.org/data/general/fev.txt", - "filename": "fev", - "name": "Childhood Respiratory Disease", + "description": "A student investigated just how effective washing with soap is in eliminating bacteria. To do this she tested four different methods—washing with water only, washing with regular soap, washing with antibacterial soap (ABS), and spraying hands with antibacterial spray (AS) (containing 65% ethanol as an active ingredient). Her experiment consisted of one experimental factor, the […] ", + "download": "https://dasl.datadescription.com/download/data/3561", + "filename": "Baterial-soap", + "name": "Baterial soap", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "The data give the results of an study aimed at reducing the risk of HIV infection among African-American adolescents. The subjects were 14-18 year old female and male adolescents in a Southern USA city. The study compared two interventions. The treatment intervention was an 8-week Behavioural Skills Training (BST) program. The control was an single 2-hour education session about HIV and AIDS. The subjects completed sexual attitude and activity questionnaires before and after the intervention and at 6-month and 12-month follow-ups. The data here are for 10 subjects for each intervention although the original study was much larger. The data given here appear to have been created by Howell (1999) based on summary statistics from the original study. The dependent variable is the logarithm-transformed frequency of condom-protected sex ( log(Y+1) ). \n\n\n\n\nVariable\n\nDescription\n\n\n\n\nBST\n\n1 = BST intervention, 0 = control\n\nPre\n\nLog-frequency of protected sex before the intervention\n\nPost\n\nLog-frequency of protected sex after the intervention\n\nFU6\n\nLog-frequency of protected sex reported at the 6 months follow-up\n\nFU6\n\nLog-frequency of protected sex reported at the 12 months follow-up\n", - "download": "http://www.statsci.org/data/general/protsex.txt", - "filename": "protsex", - "name": "Behavioural Skills Training and Protected Sex", + "description": "Measurements of 250 men of various ages. The percent of a man’s body that is fat is a matter of concern for health and fitness. But the %bodyfat is difficult and expensive to measure accurately. These data offer correct %bodyfat measurements along with a variety of easier to find measures. Can you build a model ", + "download": "https://dasl.datadescription.com/download/data/30790", + "filename": "Bodyfat", + "name": "Bodyfat", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "HARVEST (Hypertension and Ambulatory Recording Venetia Study) is a trial designed to assess whether ambulatory monitoring adds something to office (clinical) blood pressure in predicting the development of fixed hypertension and of cardiovascular complications in patients with borderline to mild hypertension. Ambulatory monitoring refers to the measuring of home blood pressure by an annotated device that the subject wears for 24 hours. The data give information on 1100 subjects compiled by Dr Paolo Palatini, Professor of Clinical Medicine at the University of Padua, Italy. \nPatients were eligible for the study if they satisfied the following criteria: \ndiastolic blood pressure (BP) between 90 and 100 mm Hg or isolated systolic hypertension (systolic BP greater than or equal to 140 mm Hg and diastolic BP less than 90 mm Hg) \nnever been treated for hypertension \naged 18 to 45 years old \nfree from other important risk factors for atherosclerosis\nThe subjects were followed for 5 years. Baseline examinations, including ECG and echocardiography, were repeated at the end of the study or upon development of hypertension, defined as BP persistently 100 mm Hg or greater or a systolic BP of 160 mm Hg or greater. Ambulatory monitoring was repeated 3 months and 5 years after the baseline evaluation.\nThe symbol C or A after the name of a variable means:\nC = clinical examination; A = ambulatory (home monitoring)\nThe last symbol of a variable name may be B, 3, 5 or E:\nB = baseline examination\n3 = 3-month examination\n5 = 5-year examination\nE = endpoint examination \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nSmoke\n\nSmoking status at baseline examination:\n0 = non-smoking,\n1 = 1-5 cigarettes per day,\n2 = 6-10 cigarettes per day,\n3 = 11-20 cigarettes per day.\n\nSport\n\nSport activity at baseline examination:\n0 = only sedentary,\n1 = light activity (walking),\n2 = sports non-competitive,\n3 = sports competitiv.\n\nSBP\n\nSystolic blood pressure\n\nDBP\n\nDiastolic blood pressure\n\nHR\n\nHeart rate\n\nAge\n\nAge in years\n\nBMI\n\nBody mass index: 100 * weight (kg) / height (m)2\n\nEndPoint\n\nEndpoint status at the time the file was created:\n1 = blood pressure level hypertensive\n0 = blood pressure level not hypertensive\n\nTime\n\nTime in months from baseline examination to the date of endpoint or to May 30, 1999, whichever was earlier\n\nMale\n\nGender:\n1 = male\n0 = female\n", - "download": "http://www.statsci.org/data/general/harvest.txt", - "filename": "harvest", - "name": "HARVEST Trial", + "description": "Burger King publishes full nutrition information on its menu. These data are for the foods on the menu recently. (Visit the site listed as the reference for the most current list.) ", + "download": "https://dasl.datadescription.com/download/data/3089", + "filename": "Burger-King-items", + "name": "Burger King items", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "The data is a subset from the Six Cities study, a longitudinal study of the health effects of air pollution. The data contain repeated binary measures of the wheezing status (1 = yes, 0 = no) for each of 537 children from Stuebenville, Ohio, at ages 7, 8, 9 and 10 years. Also measured is whether or not the mother was a smoker during the first year of the study.", - "download": "http://www.statsci.org/data/general/wheeze.txt", - "filename": "wheeze", - "name": "Child's Wheeze and Mother's Smoking", + "description": "Nutritionists are concerned that people have a good breakfast. But what does that mean? students collected nutrition information from the nutrition labels of cereals in one supermarket. ", + "download": "https://dasl.datadescription.com/download/data/3107", + "filename": "Cereals", + "name": "Cereals", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "The National Trachoma and Eye Health Program (1980) reports on the prevalance of otitis media (an infection that produces pus within the middle ear) in both aboriginal and non-aboriginal communities in Australia. The Program surveyed all aboriginal communities in Australia and attempted to contact all aborigines. Simultaneously, contact was made with non-aborigines usually living in the same or adjacent locations. Because of the high prevalence of infection in the aboriginal community only severe cases were classified as infected, virtually all of them suffering bursting of the ear drum and consequent scarring. It was thought that scarring could be used to identify those people who previously had had sever infections, but were not currently infected. So it was possible to classify subjects as (a) either not currently infected and no scarring, (b) currently infected or (c) not currently infected but one or more drums scarred. The data give the number of aborigines examined in various age intervals and the proportions classified as (a), (b) or (c). \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nAge \n\nAge interval (years)\n\nExamined \n\nNumber of subjected examined\n\nNone \n\nProportion not currently infected and with no scarring\n\nCurrent \n\nProportion currently infected\n\nPast \n\nPropotion not currently infected but with one or both drums scarred\n", - "download": "http://www.statsci.org/data/oz/otitis.txt", - "filename": "otitis", - "name": "Prevalence of Otitis Media in Aboriginal Communities", + "description": "Researchers at the University of Denver Infant Study Center wondered whether temperature might influence the age at which babies learn to crawl. Perhaps the extra clothing that babies wear in cold weather would restrict movement and delay the age at which they started crawling. Data were collected on 208 boys and 206 girls. Parents reported the month of the baby’s birth and the age (in weeks) at which their child first crawled. The table gives the average Temperature (°F) when the babies were 6 months old and average Crawling Age (in weeks) for each month of the year.", + "download": "https://dasl.datadescription.com/download/data/3143", + "filename": "Crawling", + "name": "Crawling", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "In a study of the effect of ticks on cattle in North Queensland, the disease status of animals exposed to the tick-borne parasite Anaplasma marginale is of some concern. A symptom of infection from this parasite, the number of red blood cells can be redu ced by up to 80% at the point of peak anaemia. The problem to be considered here concerns a way of quantifying the change in red blood cell populations during the recovery stages of the disease. \nIn a laboratory trial, cows were inoculated with the parasite and their red blood cells monitored before and after inoculation. The data collected were in the form of red cell volume distributions obtained from a Coulter counter, truncated and sorted into groups. In work as yet unpublished, McLaren et al. have addressed the problem of fitting distributions to similar data from humans suffering myelodysplastic anaemia, and McLaren (private communication) has suggested the need to develop hypothesis testing procedures for this type of data. \nThe observed counts of red cell volume from one of the cows on days 21 (Freq1) and 23 (Freq2) after inoculation are listed. The counts are grouped into 18 intervals of equal width of 7.2 fl. The first column (Group) lists the group number, the second (Vol) lists the truncated lower endpoint of the cell volume interval. The lower and upper truncation values for these red cell volume counts were 21.6 fl and 151.2 fl respectively. A cursory inspection of the two sets of observed frequency counts in histogram form on the logarithmic scale suggest that the red blood cell volume distribution is bimodal, at least at 21 days after inoculation. \n", - "download": "http://www.statsci.org/data/oz/rbcmix.txt", - "filename": "rbcmix", - "name": "Red Blood Cell Volume Data for Cows", + "description": "Life expectancy at birth, TV’s per capita, and doctor’s per capita for countries of the world. Doctors predict life expectancy, but is that causal? TVs also predict life expectancy. ", + "download": "https://dasl.datadescription.com/download/data/3169", + "filename": "life-expectancy", + "name": "Doctors and life expectancy", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "Does blood pressure, on average, change with age. The data here are two categorical variables: Blood pressure categorized as High, Normal, Low, and Age categorized as under 30, 30-49, and over 50", - "download": "https://dasl.datadescription.com/download/data/3077", - "filename": "Blood-Pressure", - "name": "Blood Pressure", + "description": "Fertility (births/woman) and Female life expectancy for 219 countries of the world. (Data is available on both variables for only 200). How is life expectancy related to fertility? Are there any outliers and, if so, what do they indicate", + "download": "https://dasl.datadescription.com/download/data/3202", + "filename": "Fertility-and-life-expectancy-2014", + "name": "Fertility and life expectancy 2014", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "Thirteen overweight women volunteered for a study to determine whether eating specially prepared crackers before a meal could help them lose weight. The subjects were randomly assigned to eat crackers with different types of fiber (bran fiber, gum fiber, both, and a control cracker) and cycled through several of the cracker alternatives. Unfortunately, some of the women developed uncomfortable bloating and upset stomachs. Researchers suspected that some of the crackers might be at fault. The study was paid for by the manufacturers of the gum fiber, who hoped this would be a new diet tool. What would you recommend to them about the prospects for marketing their new diet cracker?", - "download": "https://dasl.datadescription.com/download/data/3163", - "filename": "Diet", - "name": "Diet", + "description": "Gossett says in his seminal 1908 paper: “Before I had succeeded in solving my problem analytically, I had endeavoured to do so empirically. The material used was a correlation table containing the height and left middle finger measurements of 3000 criminals, from a paper by W. R. MacDonell (Biometrika, Vol. I., p. 219).” His method was to write the 3000 finger length values on cards, shuffle them thoroughly, and the deal out 750 hands of 4 cards. For each hand he then calculated (with a mechanical calculator) the mean and standard deviation. (Note; He divided by n (= 4) and not by n-1 (= 3).) He then found values of ybar – the population mean (which he knew because he had the population; it is 11.5474) and divided each by the standard deviation. The resulting values formed the distribution which he then correctly described.\nThe finger measurements were originally given in mm and the heights in feet and inches. They have been converted to cm (at https://stat.ethz.ch/R-manual/R-devel/library/datasets/html/crimtab.html). The midpoint of intervals are used where MacDonnel gives a range of values.", + "download": "https://dasl.datadescription.com/download/data/3204", + "filename": "Fingers-and-Heights", + "name": "Fingers and Heights", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "Medical researchers followed 6272 Swedish men for 30 years to see whether there\nwas any association between the amount of fish in their diet and prostate cancer. The original study actually used pairs of twins, which enabled the researchers to discern that the risk of cancer for those who never ate fish actually was substantially greater.", - "download": "https://dasl.datadescription.com/download/data/3207", - "filename": "Fish-diet", - "name": "Fish diet", + "description": "Is it true that students\ntend to gain weight during their first year in college? Cornell Professor of Nutrition David Levitsky recruited students from two large sections\nof an introductory health course. Although they were\nvolunteers, they appeared to match the rest of the freshman\nclass in terms of demographic variables such as sex\nand ethnicity. The students were weighed during the first\nweek of the semester, then again 12 weeks later. Based\non Professor Levitsky’s data, estimate the mean weight\ngain in first-semester freshmen and comment on the\n“freshman 15.” (Weights are in pounds.)", + "download": "https://dasl.datadescription.com/download/data/3218", + "filename": "Freshman-15", + "name": "Freshman 15", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "A student decided to investigate just how effective washing with soap is in eliminating bacteria. To do this she tested four different methods—washing with water only, washing with regular soap, washing with antibacterial soap (ABS), and spraying hands with antibacterial spray (AS) (containing 65% ethanol as an active ingredient). Her experiment consisted of one experimental factor, the washing Method, at four levels.\nShe suspected that the number of bacteria on her hands before washing might vary considerably from day to day. To help even out the effects of those changes, she generated random numbers to determine the order of the four treatments. Each morning, she washed her hands according to the treatment randomly chosen. Then she placed her right hand on a sterile media plate designed to encourage bacteria growth. She incubated each plate for 2 days at 36°C, after which she counted the bacteria colonies. She replicated this procedure 8 times for each of the four treatments.", - "download": "https://dasl.datadescription.com/download/data/3254", - "filename": "Hand-washing", - "name": "Hand washing", + "description": "For humans, pregnancy lasts about 280 days. In other species of animals, the length of time from conception to birth varies. Is there any evidence that the gestation period is related to the animal’s life span? The data give Gestation Period (in days) and Life Expectancy (in years) for 18 species of mammals.", + "download": "https://dasl.datadescription.com/download/data/3241", + "filename": "Gestation_", + "name": "Gestation", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "The heights and weights of students in a statistics class were recorded. ", - "download": "https://dasl.datadescription.com/download/data/3265", - "filename": "Heights-weights", - "name": "Heights and weights", + "description": "Physical therapists measure a patient’s manual dexterity with a simple task. The patient\npicks up small cylinders from a 4 * 4 frame with one hand, flips them over (still with one\nhand), and replaces them in the frame. The task is timed for all 16 cylinders. The tool was originally normed for adults. In a follow-up study, researchers\nused this tool to study how dexterity improves with age in children and establish norms against which to compare a patient’s dexterity.", + "download": "https://dasl.datadescription.com/download/data/3253", + "filename": "Hand-dexterity", + "name": "Hand dexterity", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "Canadian researcher John Coates took saliva samples in\nthe morning, twice a day for eight days, from 17 men working on a London\nmid-size trading f loor (trading a wide range of assets, with largest exposure to\nGerman interest rate futures), in June 2005, and classified each trader according\nto whether his testosterone level was high or low on that day (compared\nwith the trader’s median over the period). High testosterone days differed from\ntrader to trader, and high days differed from low days on average by 25% in\ntestosterone level. He also recorded the profits or losses (P&L) in pounds sterling\nof each trader during 11 am–4 pm daily.", - "download": "https://dasl.datadescription.com/download/data/3272", - "filename": "Hormones", - "name": "Hormones", + "description": "Fitting someone for a hearing aid requires assessing the patient’s hearing ability. In one method of assessment, the patient listens to a tape of 50 English words. The tape is played at low volume, and the patient is asked to repeat the words. The patient’s hearing ability score is the number of words perceived correctly. Four tapes of equivalent difficulty are available so that each ear can be tested with more than one hearing aid. These lists were created to be equally difficult to perceive in silence, but hearing aids must work in the presence of background noise. Researchers had 24 subjects with normal hearing compare two of the tapes when a background noise was present, with the order of the tapes randomized. Is it reasonable to assume that the two lists are still equivalent for purposes of the hearing test when there is background noise? Base your decision on a confidence interval for the mean difference in the number of words people might misunderstand.", + "download": "https://dasl.datadescription.com/download/data/3261", + "filename": "Hearing", + "name": "Hearing", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "Since the 1960s, the Centers for Disease Control and Prevention’s National Center for Health Statistics has been collecting health and nutritional information on people of all ages and backgrounds. The National Health and Nutrition Examination Survey (NHANES) of 2001–2002, measured a wide variety of variables, including body measurements, cardiovascular fitness, blood chemistry, and demographic information on more than 11,000 individuals.\nThe file holds data on the weights of 80 men between 19 and 24 years old of average height (between 5′8″ and 5′10″ tall).", - "download": "https://dasl.datadescription.com/download/data/3337", - "filename": "Mens-Weights", - "name": "Mens Weights", + "description": "Fitting someone for a hearing aid requires assessing the patient’s hearing ability. In one method of assessment, the patient listens to a tape of 50 English words. The tape is played at low volume, and the patient is asked to repeat the words. The patient’s hearing ability score is the number of words perceived correctly. Four tapes of equivalent difficulty are available so that each ear can be tested with more than one hearing aid. These lists were created to be equally difficult to perceive in silence, but hearing aids must work in the presence of background noise. Researchers had 24 subjects with normal hearing compare two of the tapes when a background noise was present, with the order of the tapes randomized. Is it reasonable to assume that the two lists are still equivalent for purposes of the hearing test when there is background noise? Base your decision on a confidence interval for the mean difference in the number of words people might misunderstand.", + "download": "https://dasl.datadescription.com/download/data/3262", + "filename": "Hearing-4-lists", + "name": "Hearing 4 lists", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "In 1879, A. A. Michelson made 100 determinations of the velocity\nof light in air using a modification of a method proposed by the French\nphysicist Foucault. The data are given here as reported by Stigler.\nThe measurements are derived from sets of often widely disparate\nnumbers of observations. The numbers are in km/sec, and have had\n299,000 subtracted from them. The currently accepted “true”\nvelocity of light in vacuum is 299,792.5 km/sec. Stigler has\napplied the corrections used by Michelson and reports that the\n“true” value appropriate for comparison to these measurements\nis 734.5. Each trial may be a summary of several experimental\nobservations.", - "download": "https://dasl.datadescription.com/download/data/3338", - "filename": "Michelson_", - "name": "Michelson", + "description": "The data hold measurements on people of various ages. The main variable of interest is the level of insulin-like growth factor (igƒ) (J. Clin. Endocrinol. Metab. 78(3): 744–752, March 1994). Each row in the data set corresponds to one individual. See also Igf13, which concentrates on children. ", + "download": "https://dasl.datadescription.com/download/data/3562", + "filename": "Igf", + "name": "Igf", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "The National Health and Nutrition Examination Survey (NHANES) is a program of studies designed to assess the health and nutritional status of adults and children in the United States. The survey is unique in that it combines interviews and physical examinations. ", - "download": "https://dasl.datadescription.com/download/data/3365", - "filename": "NHANES", - "name": "NHANES", + "description": "Measurements on children under 13 years of age. Most of the data was collected from physical examinations in schools. The main variable of interest is the level of insulin-like growth factor (igƒ) (J. Clin. Endocrinol. Metab. 78(3): 744–752, March 1994). Each row in the data set corresponds to one individual. See also the dataset Igf, which includes adults.", + "download": "https://dasl.datadescription.com/download/data/3563", + "filename": "Igf13", + "name": "Igf13", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "Body temperatures of a random sample of 52 healthy adults, reported in degrees Fahrenheit. ", - "download": "https://dasl.datadescription.com/download/data/3368", - "filename": "Normal-temperature", - "name": "Normal temperature", + "description": "Homer’s Iliad is an epic poem, compiled around 800 BCE, that describes several weeks of the last year of the 10-year siege of Troy (Ilion) by the Achaeans. The story centers on the rage of the great warrior Achilles. But it includes many details of injuries and outcomes, and is thus the oldest record of Greek medicine. The data report 146 recorded injuries for which both injury site and outcome are provided in the Illiad. Are some kinds of injuries more lethal than others?", + "download": "https://dasl.datadescription.com/download/data/3281", + "filename": "Illiad-Injuries", + "name": "Illiad Injuries", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "Obesity and exercise", - "download": "https://dasl.datadescription.com/download/data/3372", - "filename": "Obesity-and-exercise", - "name": "Obesity and exercise", + "description": "In 1974, the Bellevue-Stratford Hotel in Philadelphia was the scene of an outbreak of\nwhat later became known as legionnaires’ disease. The cause of the disease was finally discovered to be bacteria that thrived in the air-conditioning units of the hotel.\nOwners of the Rip Van Winkle Motel, hearing about the Bellevue-Stratford, replace their air-conditioning system. The data are the bacteria counts in the air of eight rooms, before and after a new air-conditioning system was installed (measured in colonies per cubic foot of air). Has the new system has succeeded in lowering the bacterial count?", + "download": "https://dasl.datadescription.com/download/data/3310", + "filename": "Legionnaires-disease", + "name": "Legionnaires disease", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "Story: \nThe Pima Indians of southern Arizona are a unique community. Their ancestors were among the first people to cross over into the Americas some 30,000 years ago. For at least two millennia, they have lived in the Sonoran Desert near the Gila River. Known throughout history as a generous people, they have given of themselves for the past 30 years helping researchers at the National Institutes of Health study certain diseases like diabetes and obe-sity. Young Pima Indians often marry other Pimas, making them an ideal group for genetic researchers to study. Pimas also have an extremely high incidence of diabetes.\nResearchers investigating factors for increased risk of diabetes examined data on 768 adult women of Pima Indian heritage. One possible predictor is the body mass index, BMI, calculated as weight/height2, where weight is measured in kilograms and height in meters. We are interested in the relationship between BMI and the incidence of diabetes. ", - "download": "https://dasl.datadescription.com/download/data/3394", - "filename": "Pima-indians", - "name": "Pima indians", + "description": "In 2015 the Council of Europe published a report entitled The European School Survey Project on Alcohol and Other Drugs (www.espad.org). Among other issues, the survey investigated the percent-ages of 16-year-olds who had used marijuana. The data are the results for 38 European countries. ", + "download": "https://dasl.datadescription.com/download/data/3326", + "filename": "Marijuana-2015", + "name": "Marijuana 2015", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "Pregnancies", - "download": "https://dasl.datadescription.com/download/data/3404", - "filename": "Pregnancies", - "name": "Pregnancies", + "description": "Researchers in Food Science studied how big people’s mouths tend to be. They measured mouth volume by pouring water into the mouths of subjects who lay on their backs. Unless this is your idea of a good time, it would be helpful to have a model to estimate mouth volume more simply. Fortunately, mouth volume is related to height. (Mouth volume is measured in cubic centimeters and height in meters.)", + "download": "https://dasl.datadescription.com/download/data/3345", + "filename": "Mouth-volume", + "name": "Mouth volume", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "The Sleep Foundation (www.sleepfoundation.org) says that adults should get at least 7 hours of sleep each night. A survey of students at a small school in the northeast U.S. asked, among other things, “How much did you sleep last night?” The data are the responses. ", - "download": "https://dasl.datadescription.com/download/data/3453", - "filename": "Sleep", - "name": "Sleep", + "description": "A hospital in Nashville is considering changes to the prenatal care they offer. They collected the gestation times of 70 pregnancies that ended in live births. The established human gestation time is 266 days. ", + "download": "https://dasl.datadescription.com/download/data/3359", + "filename": "Nashville", + "name": "Nashville", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "Time of Birth, Sex, and Birth Weight of 44 Babies", - "download": "http://jse.amstat.org/datasets/babyboom.dat.txt", - "filename": "babyboom", - "name": "babyboom", + "description": "Neck size", + "download": "https://dasl.datadescription.com/download/data/3360", + "filename": "Neck-size", + "name": "Neck size", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, - "use_first_row_for_vectorname": false + "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "This dataset contains 21 body dimension measurements as well as age, \nweight, height, and gender on 507 individuals. The 247 men and 260 \nwomen were primarily individuals in their twenties and thirties, with a \nscattering of older men and women, all exercising several hours a week.", - "download": "http://jse.amstat.org/datasets/body.dat.txt", - "filename": "Body", - "name": "Exploring Relationships in Body Dimensions", + "description": "Paralyzed veterans", + "download": "https://dasl.datadescription.com/download/data/3388", + "filename": "Paralyzed-veterans", + "name": "Paralyzed veterans", "number_format": 31, "remove_quotes": true, - "separator": "SPACE", + "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, - "use_first_row_for_vectorname": false + "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "Each record contains the results of a laboratory analysis of calcium, \ninorganic phosphorous, and alkaline phosphatase. The variable cammol \nis measured as millimoles per liter. Phosmol is inorganic phosphorous \nin millimoles per liter. Alkphos is meauring alkaline phosphatase in \ninternational units per liter. The purpose of the study was to \ndetermine if significant gender differences exist in the mean values \nof calcium, inorganic phosphorus, and alkaline phosphatase in \nsubjects over age 65. A second purpose was to determine if analytical \nvariation between laboratoreis would affect the mean values of the study variables. \nCalcium.dat contains incorrect records that have transcription errors. Calciumgood.dat \ncontains the corrected values. ", - "download": "http://jse.amstat.org/datasets/calcium.dat.txt", - "filename": "Calcium", - "name": " Calcium, inorganic phosphorus and alkaline phosphatase levels in elderly patients ", + "description": "The Paralyzed Veterans of America (PVA) is a Congressionally chartered veterans’ service organization that represents the interests of paralyzed veterans. The agency provides a range of services to veterans who have spinal cord injury or dysfunction. It derives most of its funding from contributions. The data set PVA contains a sample of the data on donors who recently gave money to the organization.", + "download": "https://dasl.datadescription.com/download/data/3415", + "filename": "PVA", + "name": "PVA", "number_format": 31, "remove_quotes": true, - "separator": "SPACE", + "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, - "use_first_row_for_vectorname": false + "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "Percentage of body fat, age, weight, height, and ten body circumference\nmeasurements (e.g., abdomen) are recorded for 252 men. Body fat, a\nmeasure of health, is estimated through an underwater weighing\ntechnique. Fitting body fat to the other measurements using multiple\nregression provides a convenient way of estimating body fat for men\nusing only a scale and a measuring tape.", - "download": "http://jse.amstat.org/datasets/fat.dat.txt", - "filename": "fat", - "name": "Fitting Percentage of Body Fat to Simple Body Measurements", + "description": "People with spinal cord injuries may lose function in some, but not all, of their muscles. The ability to push oneself up is particularly important for shifting position when seated and for transferring into and out of wheelchairs. Surgeons compared two operations to restore the ability to push up in children. ", + "download": "https://dasl.datadescription.com/download/data/3479", + "filename": "Tendon-transf", + "name": "Tendon transfers", "number_format": 31, "remove_quotes": true, - "separator": "SPACE", + "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, - "use_first_row_for_vectorname": false - }, + "use_first_row_for_vectorname": true + } + ], + "subcategory_name": "Other" + }, + { + "datasets": [ { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "Sample of 654 youths, aged 3 to 19, in the area of East Boston\nduring middle to late 1970's. Interest concerns the relationship\nbetween smoking and FEV. Since the study is necessarily\nobservational, statistical adjustment via regression models\nclarifies the relationship.", - "download": "http://jse.amstat.org/datasets/fev.dat.txt", - "filename": "fev_", - "name": "Forced Expiratory Volume (FEV) Data", + "description": " In a random sample of U.S. adults surveyed in December 2011, Pew Research asked how important it is “to you personally” to be successful in a high-paying career or profession. Responses are recorded by sex and age. ", + "download": "https://dasl.datadescription.com/download/data/3071", + "filename": "Being-successful", + "name": "Being successful", "number_format": 31, "remove_quotes": true, - "separator": "SPACE", + "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, - "use_first_row_for_vectorname": false + "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "The tab-delimited data set gives characteristics of young female patients between\nthe ages of 11 to 26 who came to clinics of Johns Hopkins Medical Institutions between\n2006 and 2008 to begin the three-shot regimen of vaccinations with the anti-human\npapillomavirus (HPV) medication Gardasil. ", - "download": "http://jse.amstat.org/v19n1/gardasil.dat.txt", - "filename": "gardasil", - "name": "Retrospective Study (Potential Predictors for Completion or Non-Completion of ", + "description": "A researcher at Cornell University wanted to know how friendship might affect simple sales such as this. She randomly divided subjects into two groups and gave each group descriptions of items they might want to buy. One group was told to imagine buying from a friend whom they expected to see again. The other group […] ", + "download": "https://dasl.datadescription.com/download/data/3090", + "filename": "Buy-from-a-friend", + "name": "Buy from a friend", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "From a very young age, shoes for boys tend to be wider than shoes for \ngirls. Is this because boys have wider feet, or because it is assumed that \ngirls, even in elementary school, are willing to sacrifice comfort for fashion? \nTo assess the former, a statistician measures kids' feet. Methods for analysis include \nt-tests, ANCOVA, and least-squares model building. This data set is useful for \ndiscussion of covariates, confounding, and conclusions in the context of the problem.", - "download": "http://jse.amstat.org/datasets/kidsfeet.dat.txt", - "filename": "kidsfeet", - "name": "Foot measurements for fourth grade children", + "description": "The September 1998 issue of the American T\nPsychologist published an article by Kraut et al. that\nreported on an experiment examining “the social and\npsychological impact of the Internet on 169 people in\n73 households during their first 1 to 2 years online.” In the\nexperiment, 73 households were offered free Internet access\nfor 1 or 2 years in return for allowing their time and activity\nonline to be tracked. The members of the households who\nparticipated in the study were also given a battery of tests\nat the beginning and again at the end of the study. The\nconclusion of the study made news headlines: Those who\nspent more time online tended to be more depressed at the\nend of the experiment.\nThe news reports about this study clearly concluded that\nusing the Internet causes depression. Is such a conclusion warranted?", + "download": "https://dasl.datadescription.com/download/data/3158", + "filename": "Depression-and-the-internet", + "name": "Depression and the internet", "number_format": 31, "remove_quotes": true, - "separator": "SPACE", + "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, - "use_first_row_for_vectorname": false + "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "This article takes data from a paper in the _Journal of the American\nMedical Association_ that examined whether the true mean body\ntemperature is 98.6 degrees Fahrenheit. Because the dataset suggests\nthat the true mean is approximately 98.2, it helps students to grasp\nconcepts about true means, confidence intervals, and t-statistics.\nStudents can use a t-test to test for sex differences in body\ntemperature and regression to investigate the relationship between\ntemperature and heart rate.", - "download": "http://jse.amstat.org/datasets/normtemp.dat.txt", - "filename": "normtemp", - "name": "Normal Body Temperature, Gender, and Heart Rate ", + "description": "A Harvard psychologist recruited 75 female hotel maids to participate in a study. She randomly selected 41 of them, whom she informed (truthfully) that the work they do satisfies the Surgeon General’s recommendations for an active lifestyle, providing examples to show that their work is good exercise. The other 34 were told nothing. Various characteristics, such as weight, body fat, body mass index and blood pressure were recorded at the start of the study and again after four weeks. The researcher was interested in whether the information she provided would result in measurable physical changes. If there is a difference, it might challenge our understanding of the placebo effect because being informed could make a difference.", + "download": "https://dasl.datadescription.com/download/data/3273", + "filename": "Hotel-maids", + "name": "Hotel maids", "number_format": 31, "remove_quotes": true, - "separator": "SPACE", + "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, - "use_first_row_for_vectorname": false - } - ], - "subcategory_name": "Common" - }, - { - "datasets": [ + "use_first_row_for_vectorname": true + }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "The effect of a single 600 mg dose of absorbic acid versus a sugar placebo on the muscular endurance (as measured by repetitive grip strength trials) of fifteen male volunteers (19-23 years old) was evaluated. The study was conducted in a double-blind manner with crossover. \nThree initial maximal contractions were performed for each subject, with the greatest value indicating maximal grip strength. Muscular endurance was measured by having the subjects squeeze the dynamometer, hold the contraction for three seconds, and repeat continuously until a value of 50% maximum grip strength was achieved for three consecutive contractions. Endurance time was defined as the number of repetitions required to go from maximum grip strength to the initial 50% value. Subjects were given frequent positive verbal encouragement in an effort to have them complete as many repetitions as possible. ", - "download": "http://www.statsci.org/data/general/vitaminc.txt", - "filename": "vitaminc", - "name": "Effect of Vitamin C on Muscular Endurance", + "description": "In an experiment to test ginkgo bloba, subjects were assigned randomly to take ginkgo biloba supplements or a placebo. Their memory was tested to see whether it improved. ", + "download": "https://dasl.datadescription.com/download/data/3335", + "filename": "Memory", + "name": "Memory", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "Proponents of Reiki, a type of touch therapy, hypothesize that Reiki re-establishes the energy balance in areas of the body experiencing disease and discomfort, thus promoting healing, reducing pain and increasing quality of life. The main feature that distinguishes Reiki from other couch therapies, such as therapeutic touch, is that Reiki therapists have physical contact with the body. Participants in Reiki are fully-clothed and may be covered with a blanket if they wish. The treatment, delivered to 18 specific areas of the body, begins with the participant lying on his or her back. The hands are placed on 10 distinct locations on the head and torso. The participant is then asked to lie on his or her stomach (or side, if this is more comfortable), where the hands are placed on 8 additional distinct locations covering the back, hip area and feet. The treatment takes approximately 1.25 hours to complete. \nTreatment of cancer pain usually focuses on opioids. Since high doses of opioids frequently aggravate other common symptoms of cancer patients, it is of interest to explore non-drug treatments that may allow control of cancer pain with lower doses of opioids. This project studied whether Reiki is beneficial in the management of pain for people from the community experiencing general chronic pain, as a prelimary step in deciding whether Reiki is worth trying for cancer patients. \nThe Sample \nThe eligibility criteria were that subjects must be at least 18 years old, not receiving chemotherapy or radiotherapy, be experiencing moderate pain (at least 3 on a VAS (0-10) or 2 on a Likert scale (0-5)), have normal cognitive function, be able to speak, read and write English, and be willing to complete the study rating scales. The sample size necessary was calculated using the binomial distribution with the assumption that 50% of the study participants might be expected to benefit from treatment. The probability of a decrease in pain following treatment in 14 or more cases out of 20 by chance alone is 0.058. [VAS means \"Visual Analogue Scale\". A Likert-type item consists of a single statement, followed by a usually five or six-point choice with each choice described in words.] \nNotices were placed in retail establishments and community centres. Potential participans identified themselves by telephoning the research assistant at a number provided on the recruitment posters. Individuals who met the eligibility criteria and who signed a consent form were scheduled to receive a treatment by a Reiki therapist. \nTwenty People were recruited (18 women and 2 men) who ranged in age from 23 to 62 years (mean 44 years). These participants were currently experiencing pain at 55 sites. Ten participants had pain in their upper body and 4 in their lower body. The remaining 6 participants had pain in both the upper and lower parts of their body. Eight participants attributed their pain to bone and muscle problems and 5 participants to chronic illness. Three of the participants included in the chronic illness group had cancer. Six participants had been experiencing pain for 1 year or less, and 7 had been experiencing pain for more than 1 year, up through 7 years. The remaining seven had been in pain for more than 7 years, one for 48 years. \nEighteen participants had asked their physician for help with their pain, and 19 were currently using at least 1 of the following strategies to manage it: analgesic preparations, anti-inflammatory medications, exercise, massage, acupuncture, therapeutic touch, chiropractic, homeopathy, meditation, vitamins, steam, muscle relaxation techniques and Tai Chi. \nTreatment and Data Collection \nParticipants were given 1 treatment by the Reiki therapist in her office. They lay on a massage table fully clothed and, if desired, were also covered with a sheet or blanket. The lights were dimmed, and a candle was lit; soft music played in the background. The environment was consistent through all 20 treatments. A pain VAS ranging from 0 to 10 and a Likert scale ranging from 0 to 5 were completed immediately before and after the Reiki treatment. ", - "download": "http://www.statsci.org/data/general/reiki.txt", - "filename": "reiki", - "name": "Using Reiki to Manage Pain", + "description": "The New York Times combined survey data (economix.blogs.nytimes.com/2013/\n07/10/working-parents-wanting-fewer-hours/) with data from\nthe U.S. Bureau of Labor Statistics (BLS) (www.bls.gov/news\n.release/archives/famee_04262013.htm) comparing how mothers\nand fathers would like to allocate their time compared with\nwhat they actually do. They asked a sample of parents with\nchildren 18 or under:\n“If money were no object, and you were free to do whatever\nyou wanted, would you stay at home, would you work full\ntime, or would you work part time?”\nPercent of respondents to this question choosing each\nalternative are reported in the “Desire” columns of the table.\nData in the “Actual” column are from the BLS. (Note:\n“Unemployed” = unemployed and actively seeking work.)\nThe table reports column percents (which may not add to\n100% due to rounding)", + "download": "https://dasl.datadescription.com/download/data/3344", + "filename": "Mothers-fathers-aspirations", + "name": "Mothers and fathers aspirations", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "The data consist of measurements (x1, x2, Age in months) on 23 babies, collected in the Faculty of Medicine at the University of Hong Kong. It would be of great medical interest to find a relationship between x1 and x2. However, any correlation between them is likely spurious because both x1 and x2 tend to increase with age. See Chris Lloyd's original mailing to the ANZStat mailing list discussion.", - "download": "http://www.statsci.org/data/general/babies.txt", - "filename": "babies", - "name": "Measurements on Babies", + "description": "In a study published in the journal Psychological Science, Rauscher, Shaw, and Ky reported that when students were given a spatial reasoning section of a standard IQ test, those who listened to Mozart for 10 minutes improved their scores more than those who simply sat quietly. ", + "download": "https://dasl.datadescription.com/download/data/3350", + "filename": "Mozart", + "name": "Mozart", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "When anthropologists analyze human skeletal remains, an important piece of information is living stature. Since skeletons are commonly based on statistical methods that utilize measurements on small bones. The following data was presented in a paper in the American Journal of Physical Anthropology to validate one such method. \n\n\n\n\nVariable\n\nDescription\n\n\n\n\nMetaCarp\n\nMetacarpal bone I length in cm\n\nStature\n\nStature in cm\n\n\n\n", - "download": "http://www.statsci.org/data/general/stature.txt", - "filename": "stature", - "name": "Prediction of Height from Metacarpal Bone Length", + "description": "Researchers interviewed participants to find some who reliably fell asleep and awoke on one side and who could remember their dreams. They found 63 participants, of whom 41 were right-side sleepers and 22 slept on their left side. Then they interviewed them about their dreams. Of the 41 right-side sleepers, only 6 reported often having nightmares. But of the 22 left-side sleepers 9 reported nightmares. Is the difference significant?", + "download": "https://dasl.datadescription.com/download/data/3366", + "filename": "Nightmares", + "name": "Nightmares", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "CPK (creatine phosphokinase) is a enzyme contained within muscle cells which is necessary for the storage and release of energy. It can be released into the blood in response to vigorous exercise from damaged (leaky) muscle cells. This occurs often even in healthy athletes. \nThis study intestigated the metabolic effect of cross-country skiing. Subjects were participants in a 24 hour cross-country relay. Age, weight (kg) and blood CPK concentration 12 hours into the relay were recorded.", - "download": "http://www.statsci.org/data/general/bloodcpk.txt", - "filename": "bloodcpk", - "name": "Blood CPK in Cross-Country Skiers", + "description": "Stereograms appear to be composed entirely of\nrandom dots. However, they contain separate images that a\nviewer can “fuse” into a three-dimensional (3D) image by staring\nat the dots while defocusing the eyes. An experiment was\nperformed to determine whether knowledge of the embedded\nimage affected the time required for subjects to fuse the images.\nOne group of subjects (group NV) received no information or\njust verbal information about the shape of the embedded object.\nA second group (group VV) received both verbal information\nand visual information (specifically, a drawing of the object).\nThe experimenters measured how many seconds it took for the\nsubject to report that he or she saw the 3D image.", + "download": "https://dasl.datadescription.com/download/data/3459", + "filename": "Stereograms", + "name": "Stereograms", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true - }, + } + ], + "subcategory_name": "Psychology" + }, + { + "datasets": [ { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "Studies conducted at the University of Melbourne indicate that there may be a difference between the pain thresholds of blonds and brunettes. Men and women of various ages were divided into four categories according to hair colour: light blond, dark blond, light brunette, and dark brunette. The purpose of the experiment was to determine whether hair colour is related to the amount of pain produced by common types of mishaps and assorted types of trauma. Each person in the experiment was given a pain threshold score based on his or her performance in a pain sensitivity test (the higher the score, the higher the person’s pain tolerance). \n\nVariable\n\nValues\n\nHairColour\n\nLightBlond, DarkBlond, LightBrunette or DarkBrunette \n\nPain\n\nPain theshold score \n", - "download": "http://www.statsci.org/data/oz/blonds.txt", - "filename": "blonds", - "name": "Pain Thresholds of Blonds and Brunettes", + "description": "A study examined the health risks of smoking measured the cholesterol levels of people who had smoked for at least 25 years and people of similar ages who had smoked for no more than 5 years and then stopped", + "download": "https://dasl.datadescription.com/download/data/3111", + "filename": "Cholesterol-and-smoking", + "name": "Cholesterol and smoking", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "For his MS305 data project, Michael Larner measured the weight and various physical measurements for 22 male subjects aged 16 - 30. Subjects were randomly chosen volunteers, all in reasonable good health. Subjects were requested to slightly tense each muscle being measured to ensure measurement consistency. Apart from Mass, all measurements are in cm. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nMass\n\nWeight in kg\n\nFore\n\nMaximum circumference of forearm\n\nBicep\n\nMaximum circumference of bicep\n\nChest\n\nDistance around chest directly under the armpits\n\nNeck\n\nDistance around neck, approximately halfway up\n\nWaist\n\nDistance around waist, approximately trouser line\n\nThigh\n\nCircumference of thigh, measured halfway between the knee and the top of the leg\n\nCalf\n\nMaximum circumference of calf\n\nHeight\n\nHeight from top to toe\n\nShoulders\n\nDistance around shoulders, measured around the peak of the shoulder blades\n", - "download": "http://www.statsci.org/data/oz/physical.txt", - "filename": "physical", - "name": "Mass and Physical Measurements for Male Subjects", + "description": "Data on 816 brands of cigarettes. What relationships are there among the nicotine content, tars, and CO? Are any brands unusually high or low in nicotine? Can you account for that? ", + "download": "https://dasl.datadescription.com/download/data/3113", + "filename": "Cigarettes", + "name": "Cigarettes", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "Larsen and Marx (1986) write \nIn folklore, the full moon is often portrayed as something sinister, a kind of evil force possessing the power to control our behaviour. Over the centuries, many prominent writers and philosophers have shared this belief. Milton, in Paradise Lost, refers to \nDemoniac frenzy, moping melancholy\nAnd moon-struck madness. \nAnd Othello, after the murder of Desdemona, laments \nIt is the very error of the moon\nShe comes more near the earth than she was want\nAnd makes men mad. \nOn a more scholarly level, Sir William Blackstone, the renowned eighteenth centure English barrister, defined a \"lunatic\" as \none who hath ... lost the use of his reason and who hath lucid intervals, sometimes enjoying his senses and sometimes not, and that frequently depending upon changes of the moon. \nThe data give the admission rates to the emergency room of a Virginia mental health clinic before, during and after the 12 full moons from August 1971 to July 1972. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nMonth\n\nMonth of year: Aug, Sep, ... Jul\n\nMoon\n\nBefore, During or After the full moon\n\nAdmission\n\nAdmission rate (patients/day)\n\n\n\n", - "download": "http://www.statsci.org/data/general/fullmoon.txt", - "filename": "fullmoon", - "name": "Mental Hospital Admissions During Full Moons", + "description": "Researchers measured the concentration (nanograms per milliliter) of cotinine in the blood\nof three groups of people: nonsmokers who have not been exposed to smoke, nonsmokers\nwho have been Exposed To Smoke (ETS), and smokers. Cotinine is left in the blood when\nthe body metabolizes nicotine, so its value is a direct measurement of the effect of passive smoke exposure.", + "download": "https://dasl.datadescription.com/download/data/3389", + "filename": "Passive-smoke", + "name": "Passive smoke", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "Five types of electrodes were applied to the arms of 16 subjects and the resistance measured. The experiment was designed to see whether all five electrode types performed similarly. \nAfter obtaining the results, the experimenters decided that the reason for the two large readings on subject 15 was the excessive amount of hair of those parts of the subject's arm. They concluded that this subject's data should be deleted. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nSubject\n\nSubject number\n\nE1\n\nResistance measured by electrode type 1\n\nE2\n\nResistance measured by electrode type 2\n\nE3\n\nResistance measured by electrode type 3\n\nE4\n\nResistance measured by electrode type 4\n\nE5\n\nResistance measured by electrode type 5\n", - "download": "http://www.statsci.org/data/general/resist.txt", - "filename": "resist", - "name": "Skin Resistance", + "description": "The Centers for Disease Control and Prevention\ntrack cigarette smoking in the United States. How has the percentage of people who smoke changed since the danger became clear during the last half of the 20th\ncentury? The data give percentages of smokers among\nmen 18–24 years of age, as estimated by surveys, from 1965\nthrough 2014.", + "download": "https://dasl.datadescription.com/download/data/3455", + "filename": "Smoking-2014", + "name": "Smoking 2014", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "Osteoarthritis is a mechanical degeneration of joint surfaces causing pain, swelling and loss of joint function in one or more joints. Physiotherapists treat the affected joints to reduce pain (VAS = visual analogue scale) and to increase the range of movement (ROM). In this study there were 10 subjects, each of whom was treated with continuous TENS (electric nerve stimulation) and short wave diathermy. Measurements were taken also after no treatment. \n\n\n\n\nVariable\n\nDescription\n\n\n\n\nSubject\n\nSubject identifier\n\nNoROM\n\nROM after no treatment\n\nNoVAS\n\nVAS after no treatment\n\nTENSROM\n\nROM after continuous TENS\n\nTENSVAS\n\nVAS after continuous TENS\n\nSWDROM\n\nROM after short wave diathermy\n\nSWDVAS\n\nVAS after short wave diathermy\n", - "download": "http://www.statsci.org/data/oz/oa.txt", - "filename": "oa", - "name": "Treatment for Osteoarthritis", + "description": "There has been a steady decline the the percentage of pregnant mothers who smoke. These data document the trend. The run only until 2011, which appears to be the latest date for which the CDC has data. ", + "download": "https://dasl.datadescription.com/download/data/3456", + "filename": "Smoking-and-Pregnancy-2011", + "name": "Smoking and Pregnancy 2011", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true - }, + } + ], + "subcategory_name": "Smoking" + } + ] + }, + { + "category_name": "Nature", + "subcategories": [ + { + "datasets": [ { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "The balance of subjects were observed for two different surfaces and for restricted and unrestricted vision. Balance was assessed qualitatively on an ordinal 4-point scale based on observation by the experimenter. Subjects were expected to be better balanced (show less sway) when standing on the normal surface than on foam, and when their eyes were open rather than closed or when their vision was restricted by a dome. \nEqual numbers of male and female subjects were chosen. For both males and females, ten older (more than 24 years old) and ten younger subjects were selected. \nThe data is available in two formats. The is in univariate or \"strung out form\" which is suitable for entry to Minitab or S-Plus and to most mixed model programs. The second is in repeated measures format which is suitable for SPSS and for most special purpose repeated measures programs. \nUnivariate format: \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nSubject\n\n1 to 40\n\nSex\n\nmale or female\n\nAge\n\nAge of subject in years\n\nHeight\n\nHeight in cm\n\nWeight\n\nWeight in kg\n\nSurface\n\nnormal or foam\n\nVision\n\neyes open, eyes closed, or closed dome\n\nCTSIB\n\nQualitive measure of balance, 1 (stable) - 4 (unstable) \n\n\n\n", - "download": "http://www.statsci.org/data/oz/ctsibuni.txt", - "filename": "ctsibuni", - "name": "Effect of Surface and Vision on Balance", + "description": "Froliger and Kane measured the pH (a scale on which a value of 7 is neutral and values below 7 are acidic) of water collected from precipitation events in Allegheny County, Pennsylvania between December 20, 1973 and May 23, 1974. Display the distribution of these values and describe with words and numbers what you see. ", + "download": "https://dasl.datadescription.com/download/data/3041", + "filename": "acid-rain", + "name": "Acid rain", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "The data related to the transport of sulfite ions from blood cells suspended in a salt solution. The chloride concentration (%) was measured over a period of about 8 minutes as a continuous curve generated from electrical potentials. The data given here were digitized from the curve at 10 second intervals. \nThe theory of ion transport suggested that the concentration asymptote exponentially, i.e., \nChloride = q1{1 - q2exp(- q3Time)} \n\n\n\n\nVariable\n\nDescription\n\n\n\n\nTime\n\nElapsed time in minutes\n\nChloride\n\nChloride concentration (%)\n", - "download": "http://www.statsci.org/data/general/chloride.txt", - "filename": "chloride", - "name": "Transport of Sulfite Ions from Blood Cells", + "description": "The data give the average January Temperature (in degrees Fahrenheit) and Latitude (in degrees north of the equator) for 59 U.S. cities. How are they related? ", + "download": "https://dasl.datadescription.com/download/data/3114", + "filename": "City-climate", + "name": "City climate", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "The data are the times, in days, that heroin addicts spend in a clinic. There are two clinics and the covariates are believed to affect the times spent in the clinic by addicts. \n \nVariable\n \nDescription\n\nClinic\n\n1 or 2\n\nStatus\n\n0 = still in clinic at end of study (censored) or 1 = departed from clinic\n\nTime\n\ndays spent in clinic\n\nPrison\n\n1 = prison record or 0 = no record\n\nDose\n\nmethadone dosage (mg/day)\n", - "download": "http://www.statsci.org/data/oz/heroin.txt", - "filename": "heroin", - "name": "Methadone Treatment of Heroin Addicts", + "description": "-", + "download": "https://dasl.datadescription.com/download/data/3115", + "filename": "City-temperatures", + "name": "City temperatures", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "A pharmaceutical company tested three formulations of a pain relief medicine for migraine headache sufferers. For the experiment, 27 volunteers were selected and 9 were randomly assigned to one of three drug formulations. The subjects were instructed to take the drug during their next migraine headache episode and to report their pain on a scale […] ", - "download": "https://dasl.datadescription.com/download/data/3053", - "filename": "Analgesics", - "name": "Analgesics", + "description": "Global temperature from https://www.ncdc.noaa.gov/cag/data-info/global Global temperature anomaly data come from the Global Historical Climatology Network-Monthly (GHCN-M) data set and International Comprehensive Ocean-Atmosphere Data Set (ICOADS), which have data from 1880 to the present. These two datasets are blended into a single product to produce the combined global land and ocean temperature anomalies. The available timeseries of global-scale temperature anomalies are calculated with respect to the 20th century average, while the mapping tool displays global-scale temperature anomalies with respect to the 1981-2010 base period. For more information on these anomalies, please visit Global Surface Temperature Anomalies. CO2 from ftp://aftp.cmdl.noaa.gov/products/trends/co2/co2_annmean_mlo.txt DJIA from https://www.measuringworth.com\n\nScientists claim that changes in the mean global temperature are primarily due to changes in CO2 levels. Both trends are here from 1959 to 2016. For an alternative, the data includes the annual closing price of the Dow Jones Industrial Average. Can it predict global temperature?", + "download": "https://dasl.datadescription.com/download/data/3116", + "filename": "Climate-change-2016", + "name": "Climate change 2016", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "A study compared the effectiveness of several antidepressants by examining the experiments in which they had passed the FDA requirements. Each of those experiments compared the active drug with a placebo, an inert pill given to some of the subjects. In each experiment some patients treated with the placebo had improved, a phenomenon called the […] ", - "download": "https://dasl.datadescription.com/download/data/3054", - "filename": "Antidepressants", - "name": "Antidepressants", + "description": "Hurricane frequencies", + "download": "https://dasl.datadescription.com/download/data/3279", + "filename": "Hurricane-frequencies", + "name": "Hurricane frequencies", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "A student investigated just how effective washing with soap is in eliminating bacteria. To do this she tested four different methods—washing with water only, washing with regular soap, washing with antibacterial soap (ABS), and spraying hands with antibacterial spray (AS) (containing 65% ethanol as an active ingredient). Her experiment consisted of one experimental factor, the […] ", - "download": "https://dasl.datadescription.com/download/data/3561", - "filename": "Baterial-soap", - "name": "Baterial soap", + "description": "Hurricane history", + "download": "https://dasl.datadescription.com/download/data/3280", + "filename": "Hurricane-history", + "name": "Hurricane history", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "Measurements of 250 men of various ages. The percent of a man’s body that is fat is a matter of concern for health and fitness. But the %bodyfat is difficult and expensive to measure accurately. These data offer correct %bodyfat measurements along with a variety of easier to find measures. Can you build a model ", - "download": "https://dasl.datadescription.com/download/data/30790", - "filename": "Bodyfat", - "name": "Bodyfat", + "description": "The barometric pressure at the center of a hurricane is often used to measure the strength of the hurricane because it can predict the maximum wind speed of the storm. How well is the wind speed predicted by the barometric pressure? ", + "download": "https://dasl.datadescription.com/download/data/3278", + "filename": "Hurricanes-2015", + "name": "Hurricanes 2015", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "Burger King publishes full nutrition information on its menu. These data are for the foods on the menu recently. (Visit the site listed as the reference for the most current list.) ", - "download": "https://dasl.datadescription.com/download/data/3089", - "filename": "Burger-King-items", - "name": "Burger King items", + "description": "The Los Angeles Almanac reports a number of variables about the weather in LA. Among them is the annual rainfall, reported here for 1991-2018. It is worthwhile to look up any outliers. ", + "download": "https://dasl.datadescription.com/download/data/3555", + "filename": "LA-rainfall", + "name": "LA rainfall", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "Nutritionists are concerned that people have a good breakfast. But what does that mean? students collected nutrition information from the nutrition labels of cereals in one supermarket. ", - "download": "https://dasl.datadescription.com/download/data/3107", - "filename": "Cereals", - "name": "Cereals", + "description": "Is global climate change leading to an increase in the number of major hurricanes? The data gives the number of hurricanes classified as major hurricanes in the Atlantic Ocean each year from 1944 through 2013, as reported by NOAA: ", + "download": "https://dasl.datadescription.com/download/data/3323", + "filename": "Major-hurricane-2013", + "name": "Major hurricanes 2013", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "Researchers at the University of Denver Infant Study Center wondered whether temperature might influence the age at which babies learn to crawl. Perhaps the extra clothing that babies wear in cold weather would restrict movement and delay the age at which they started crawling. Data were collected on 208 boys and 206 girls. Parents reported the month of the baby’s birth and the age (in weeks) at which their child first crawled. The table gives the average Temperature (°F) when the babies were 6 months old and average Crawling Age (in weeks) for each month of the year.", - "download": "https://dasl.datadescription.com/download/data/3143", - "filename": "Crawling", - "name": "Crawling", + "description": "Tornadoes 2015\nSource: www.nws.noaa.gov/om/hazstats/resources/weather_fatalities.pdf", + "download": "https://dasl.datadescription.com/download/data/3488", + "filename": "Tornadoes", + "name": "Tornadoes 2015", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "Life expectancy at birth, TV’s per capita, and doctor’s per capita for countries of the world. Doctors predict life expectancy, but is that causal? TVs also predict life expectancy. ", - "download": "https://dasl.datadescription.com/download/data/3169", - "filename": "life-expectancy", - "name": "Doctors and life expectancy", + "description": "Tracking hurricanes 2015", + "download": "https://dasl.datadescription.com/download/data/3493", + "filename": "Tracking-hurricanes-2015", + "name": "Tracking hurricanes 2015", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "Fertility (births/woman) and Female life expectancy for 219 countries of the world. (Data is available on both variables for only 200). How is life expectancy related to fertility? Are there any outliers and, if so, what do they indicate", - "download": "https://dasl.datadescription.com/download/data/3202", - "filename": "Fertility-and-life-expectancy-2014", - "name": "Fertility and life expectancy 2014", - "number_format": 31, - "remove_quotes": true, + "description": "The National Hurricane Center (NHC) of the National Oceanic and Atmospheric\nAdministration (NOAA) tries to predict the path each hurricane will take. But hurricanes\ntend to wander around aimlessly and are pushed by fronts and other weather\nphenomena in their area, so they are notoriously difficult to predict. Even relatively small changes in a hurricane’s track can make big differences in the damage it causes. The data give the mean error in nautical miles of the NHC’s 72-hour predictions of Atlantic hurricanes for 1970-2017. NOAA refers to these errors as the Forecast\nerror or the Prediction error and reports annual results.", + "download": "https://dasl.datadescription.com/download/data/3494", + "filename": "Tracking-hurricanes-2016", + "name": "Tracking hurricanes 2016", + "number_format": 31, + "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "Gossett says in his seminal 1908 paper: “Before I had succeeded in solving my problem analytically, I had endeavoured to do so empirically. The material used was a correlation table containing the height and left middle finger measurements of 3000 criminals, from a paper by W. R. MacDonell (Biometrika, Vol. I., p. 219).” His method was to write the 3000 finger length values on cards, shuffle them thoroughly, and the deal out 750 hands of 4 cards. For each hand he then calculated (with a mechanical calculator) the mean and standard deviation. (Note; He divided by n (= 4) and not by n-1 (= 3).) He then found values of ybar – the population mean (which he knew because he had the population; it is 11.5474) and divided each by the standard deviation. The resulting values formed the distribution which he then correctly described.\nThe finger measurements were originally given in mm and the heights in feet and inches. They have been converted to cm (at https://stat.ethz.ch/R-manual/R-devel/library/datasets/html/crimtab.html). The midpoint of intervals are used where MacDonnel gives a range of values.", - "download": "https://dasl.datadescription.com/download/data/3204", - "filename": "Fingers-and-Heights", - "name": "Fingers and Heights", + "description": "Tsunamis 2016", + "download": "https://dasl.datadescription.com/download/data/3500", + "filename": "Tsunamis-2016", + "name": "Tsunamis 2016", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "Is it true that students\ntend to gain weight during their first year in college? Cornell Professor of Nutrition David Levitsky recruited students from two large sections\nof an introductory health course. Although they were\nvolunteers, they appeared to match the rest of the freshman\nclass in terms of demographic variables such as sex\nand ethnicity. The students were weighed during the first\nweek of the semester, then again 12 weeks later. Based\non Professor Levitsky’s data, estimate the mean weight\ngain in first-semester freshmen and comment on the\n“freshman 15.” (Weights are in pounds.)", - "download": "https://dasl.datadescription.com/download/data/3218", - "filename": "Freshman-15", - "name": "Freshman 15", + "description": "http://www.ngdc.noaa.gov/hazard/tsu_db.shtml Extracted Event Validity 3 and 4 Cause Codes 1-5 Event Validity: 4 = definite tsunami 3 = probable tsunami 2 = questionable tsunami 1 = very doubtful tsunami 0 = event that only caused a seiche or disturbance in an inland river -1 = erroneous entry Cause Code: Valid values: 0 to 11 The source of the tsunami: 0 = Unknown 1 = Earthquake 2 = Questionable Earthquake 3 = Earthquake and Landslide 4 = Volcano and Earthquake 5 = Volcano, Earthquake, and Landslide 6 = Volcano 7 = Volcano and Landslide 8 = Landslide 9 = Meteorological 10 = Explosion 11 = Astronomical Tide", + "download": "https://dasl.datadescription.com/download/data/3501", + "filename": "Tsunamis-2018", + "name": "Tsunamis 2018", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "For humans, pregnancy lasts about 280 days. In other species of animals, the length of time from conception to birth varies. Is there any evidence that the gestation period is related to the animal’s life span? The data give Gestation Period (in days) and Life Expectancy (in years) for 18 species of mammals.", - "download": "https://dasl.datadescription.com/download/data/3241", - "filename": "Gestation_", - "name": "Gestation", + "description": "Weather forecasts", + "download": "https://dasl.datadescription.com/download/data/3519", + "filename": "Weather-forecasts", + "name": "Weather forecasts", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "Physical therapists measure a patient’s manual dexterity with a simple task. The patient\npicks up small cylinders from a 4 * 4 frame with one hand, flips them over (still with one\nhand), and replaces them in the frame. The task is timed for all 16 cylinders. The tool was originally normed for adults. In a follow-up study, researchers\nused this tool to study how dexterity improves with age in children and establish norms against which to compare a patient’s dexterity.", - "download": "https://dasl.datadescription.com/download/data/3253", - "filename": "Hand-dexterity", - "name": "Hand dexterity", + "description": "Wind speed", + "download": "https://dasl.datadescription.com/download/data/3528", + "filename": "Wind-speed", + "name": "Wind speed", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true - }, + } + ], + "subcategory_name": "Weather" + }, + { + "datasets": [ { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "Fitting someone for a hearing aid requires assessing the patient’s hearing ability. In one method of assessment, the patient listens to a tape of 50 English words. The tape is played at low volume, and the patient is asked to repeat the words. The patient’s hearing ability score is the number of words perceived correctly. Four tapes of equivalent difficulty are available so that each ear can be tested with more than one hearing aid. These lists were created to be equally difficult to perceive in silence, but hearing aids must work in the presence of background noise. Researchers had 24 subjects with normal hearing compare two of the tapes when a background noise was present, with the order of the tapes randomized. Is it reasonable to assume that the two lists are still equivalent for purposes of the hearing test when there is background noise? Base your decision on a confidence interval for the mean difference in the number of words people might misunderstand.", - "download": "https://dasl.datadescription.com/download/data/3261", - "filename": "Hearing", - "name": "Hearing", + "description": "The annual number of deaths from floods in the United states from 1995 through 2015. Years are not provided, but the data values are in time order.", + "download": "https://dasl.datadescription.com/download/data/3211", + "filename": "Floods-2015", + "name": "Floods 2015", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "Fitting someone for a hearing aid requires assessing the patient’s hearing ability. In one method of assessment, the patient listens to a tape of 50 English words. The tape is played at low volume, and the patient is asked to repeat the words. The patient’s hearing ability score is the number of words perceived correctly. Four tapes of equivalent difficulty are available so that each ear can be tested with more than one hearing aid. These lists were created to be equally difficult to perceive in silence, but hearing aids must work in the presence of background noise. Researchers had 24 subjects with normal hearing compare two of the tapes when a background noise was present, with the order of the tapes randomized. Is it reasonable to assume that the two lists are still equivalent for purposes of the hearing test when there is background noise? Base your decision on a confidence interval for the mean difference in the number of words people might misunderstand.", - "download": "https://dasl.datadescription.com/download/data/3262", - "filename": "Hearing-4-lists", - "name": "Hearing 4 lists", + "description": "Climate scientists have been observing the extent of sea ice using satellite observations. Many have expressed concern because, since 1980, the extent of sea ice has declined precipitously—possibly due to global climate change. But a multiple regression of Extent on temp and year gives a coefficient for temp that is essentially zero. ", + "download": "https://dasl.datadescription.com/download/data/3443", + "filename": "Sea-ice", + "name": "Sea ice", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "The data hold measurements on people of various ages. The main variable of interest is the level of insulin-like growth factor (igƒ) (J. Clin. Endocrinol. Metab. 78(3): 744–752, March 1994). Each row in the data set corresponds to one individual. See also Igf13, which concentrates on children. ", - "download": "https://dasl.datadescription.com/download/data/3562", - "filename": "Igf", - "name": "Igf", + "description": "As part of the course work, a class at an upstate\nNY college collects data on streams each year. Students\nrecord a number of biological, chemical, and physical variables,\nincluding the stream name, the substrate of the stream\n(limestone (L), shale (S), or mixed (M)), the pH, the temperature\n(\u001dC), and the BCI, a measure of biological diversity.", + "download": "https://dasl.datadescription.com/download/data/3463", + "filename": "Streams", + "name": "Streams", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true - }, + } + ], + "subcategory_name": "Waters" + }, + { + "datasets": [ { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "Measurements on children under 13 years of age. Most of the data was collected from physical examinations in schools. The main variable of interest is the level of insulin-like growth factor (igƒ) (J. Clin. Endocrinol. Metab. 78(3): 744–752, March 1994). Each row in the data set corresponds to one individual. See also the dataset Igf, which includes adults.", - "download": "https://dasl.datadescription.com/download/data/3563", - "filename": "Igf13", - "name": "Igf13", + "description": "-", + "download": "https://dasl.datadescription.com/download/data/3074", + "filename": "Bird-Species-2013", + "name": "Bird-Species-2013", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "Homer’s Iliad is an epic poem, compiled around 800 BCE, that describes several weeks of the last year of the 10-year siege of Troy (Ilion) by the Achaeans. The story centers on the rage of the great warrior Achilles. But it includes many details of injuries and outcomes, and is thus the oldest record of Greek medicine. The data report 146 recorded injuries for which both injury site and outcome are provided in the Illiad. Are some kinds of injuries more lethal than others?", - "download": "https://dasl.datadescription.com/download/data/3281", - "filename": "Illiad-Injuries", - "name": "Illiad Injuries", + "description": "The ranges inhabited by the Indian gharial\ncrocodile and the Australian saltwater crocodile overlap in\nBangladesh. Suppose a very large crocodile skeleton is found\nthere, and we wish to determine the species of the animal.\nWildlife scientists have measured the lengths of the heads\nand the complete bodies of several crocs (in centimeters) of\neach species.\n", + "download": "https://dasl.datadescription.com/download/data/3147", + "filename": "Crocodile-lengths", + "name": "Crocodile lengths", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "In 1974, the Bellevue-Stratford Hotel in Philadelphia was the scene of an outbreak of\nwhat later became known as legionnaires’ disease. The cause of the disease was finally discovered to be bacteria that thrived in the air-conditioning units of the hotel.\nOwners of the Rip Van Winkle Motel, hearing about the Bellevue-Stratford, replace their air-conditioning system. The data are the bacteria counts in the air of eight rooms, before and after a new air-conditioning system was installed (measured in colonies per cubic foot of air). Has the new system has succeeded in lowering the bacterial count?", - "download": "https://dasl.datadescription.com/download/data/3310", - "filename": "Legionnaires-disease", - "name": "Legionnaires disease", + "description": "In 2004, a team of researchers published a study of contaminants in farmed salmon. Fish from many sources were analyzed for 14 organic contaminants. The study\nexpressed concerns about the level of contaminants found. One of those was the\ninsecticide mirex, which has been shown to be carcinogenic and is suspected to be\ntoxic to the liver, kidneys, and endocrine system. The dataset holds 153 observed salmon samples and reports concentrations of a number of contaminant.", + "download": "https://dasl.datadescription.com/download/data/3199", + "filename": "Farmed-salmon", + "name": "Farmed salmon", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "In 2015 the Council of Europe published a report entitled The European School Survey Project on Alcohol and Other Drugs (www.espad.org). Among other issues, the survey investigated the percent-ages of 16-year-olds who had used marijuana. The data are the results for 38 European countries. ", - "download": "https://dasl.datadescription.com/download/data/3326", - "filename": "Marijuana-2015", - "name": "Marijuana 2015", + "description": "Wildlife researchers monitor many wildlife populations by taking aerial photographs. Can they estimate the weights of alligators accurately from the air? Here are data on the Weight of alligators (in pounds) and their Length (in inches). ", + "download": "https://dasl.datadescription.com/download/data/3236", + "filename": "Gators", + "name": "Gators", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "Researchers in Food Science studied how big people’s mouths tend to be. They measured mouth volume by pouring water into the mouths of subjects who lay on their backs. Unless this is your idea of a good time, it would be helpful to have a model to estimate mouth volume more simply. Fortunately, mouth volume is related to height. (Mouth volume is measured in cubic centimeters and height in meters.)", - "download": "https://dasl.datadescription.com/download/data/3345", - "filename": "Mouth-volume", - "name": "Mouth volume", + "description": "The Maine lobster fishing industry is carefully controlled and licensed, and facts about it have been recorded for more than a century, so it is an important industry that we can examine in detail. The dataset holds annual data ", + "download": "https://dasl.datadescription.com/download/data/3317", + "filename": "Lobsters-2016", + "name": "Lobsters 2016", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "A hospital in Nashville is considering changes to the prenatal care they offer. They collected the gestation times of 70 pregnancies that ended in live births. The established human gestation time is 266 days. ", - "download": "https://dasl.datadescription.com/download/data/3359", - "filename": "Nashville", - "name": "Nashville", + "description": "Manatees are gentle mammals that live in the waters off the coast of Florida and a few other places. Unfortunately, many are killed each year in collisions with powerboats. Marine biologists warn that the growing number of powerboats registered in Florida threatens the existence of manatees. The data here are the number of manatees killed each year since 1982 and the number of powerboats registered in Florida (in thousands) for those years. Is there a relationship?", + "download": "https://dasl.datadescription.com/download/data/3325", + "filename": "Manatees-2015", + "name": "Manatees 2015", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "Neck size", - "download": "https://dasl.datadescription.com/download/data/3360", - "filename": "Neck-size", - "name": "Neck size", + "description": "Psychology experiments sometimes involve testing the\nability of rats to navigate mazes. The mazes are classified\naccording to difficulty, as measured by the mean length of\ntime it takes rats to find the food at the end. One researcher\nneeded a maze that will take rats an average of about one minute\nto solve. He tested one maze on several rats, collecting the\ndata provided.", + "download": "https://dasl.datadescription.com/download/data/3333", + "filename": "Maze", + "name": "Maze", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "Paralyzed veterans", - "download": "https://dasl.datadescription.com/download/data/3388", - "filename": "Paralyzed-veterans", - "name": "Paralyzed veterans", + "description": "Can pleasant smells improve learning? Researchers timed 21 subjects as they tried to complete paper-and-pencil mazes. Each subject attempted a maze both with and without the presence of a floral aroma. Subjects were randomized with respect to whether they did the scented trial first or second. Is there any evidence that the floral scent improved the subjects’ ability to complete the mazes?", + "download": "https://dasl.datadescription.com/download/data/3334", + "filename": "Mazes-smells", + "name": "Mazes and smells", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "The Paralyzed Veterans of America (PVA) is a Congressionally chartered veterans’ service organization that represents the interests of paralyzed veterans. The agency provides a range of services to veterans who have spinal cord injury or dysfunction. It derives most of its funding from contributions. The data set PVA contains a sample of the data on donors who recently gave money to the organization.", - "download": "https://dasl.datadescription.com/download/data/3415", - "filename": "PVA", - "name": "PVA", + "description": "Emperor penguins are the most accomplished divers among birds, making routine\ndives of 5–12 minutes, with the longest recorded dive over 27 minutes. These\nbirds can also dive to depths of over 500 meters! Since air-breathing animals like\npenguins must hold their breath while submerged, the duration of any given dive\ndepends on how much oxygen is in the bird’s body at the beginning of the dive, how\nquickly that oxygen gets used, and the lowest level of oxygen the bird can tolerate.\nThe rate of oxygen depletion is primarily determined by the penguin’s heart rate.\nConsequently, studies of heart rates during dives can help us understand how these\nanimals regulate their oxygen consumption in order to make such impressive dives.The researchers equipped emperor penguins with devices that record their heart rates during\ndives. The dataset reports Dive Heart Rate (beats per minute), the Duration\n(minutes) of dives, and other related variables.", + "download": "https://dasl.datadescription.com/download/data/3391", + "filename": "Penguins", + "name": "Penguins", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "People with spinal cord injuries may lose function in some, but not all, of their muscles. The ability to push oneself up is particularly important for shifting position when seated and for transferring into and out of wheelchairs. Surgeons compared two operations to restore the ability to push up in children. ", - "download": "https://dasl.datadescription.com/download/data/3479", - "filename": "Tendon-transf", - "name": "Tendon transfers", + "description": "Salmon", + "download": "https://dasl.datadescription.com/download/data/3435", + "filename": "Salmon", + "name": "Salmon", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "Drug interaction study of a new and a standard oral contraceptive \ntherapy. See the \"STORY BEHIND THE DATA\" and \"PEDAGOGICAL NOTES\" \nsections below for details.", - "download": "http://jse.amstat.org/datasets/ocdrug.dat.txt", - "filename": "ocdrug", - "name": "Drug Interaction", + "description": "The number of storks in Oldenburg, Germany, plotted against the population of the town for 7 years in the 1930s. Do storks bring babies? ", + "download": "https://dasl.datadescription.com/download/data/3462", + "filename": "Storks", + "name": "Storks", "number_format": 31, "remove_quotes": true, - "separator": "SPACE", + "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, - "use_first_row_for_vectorname": false + "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "Bacteria are cultured in medical laboratories to identify them so patients can be treated \ncorrectly. The tryptone dataset contains measurements of bacteria counts following the \nculturing of five strains of Staphylococcus aureus. There are many strains of \nStaphylococcus aureus; five were used by the experimenter. They are identified by numbers \nin the data because their names are too complicated to be useful as identifiers. The \ndataset also contains the time of incubation, temperature of incubation and concentration \nof tryptone, a nutrient. The protocols for culturing this bacteria, set the time at 24 \nhours, the temperature at 35 degrees and the tryptone concentration at 1.0%. The question \nis whether the conditions recommended in the protocols for the culturing of these strains \nare optimal. The task is to find the incubation time, temperature and tryptone concentration \nthat optimises the growth of this Bacterium.", - "download": "http://jse.amstat.org/datasets/Tryptone.dat.txt", - "filename": "Tryptone", - "name": "The Tryptone Task ", + "description": "Large herds of wild horses can become a problem on some federal lands in the West. Researchers hoping to improve the management of these herds collected data to see if they could predict the number of foals that would be born based on the size of the current herd. ", + "download": "https://dasl.datadescription.com/download/data/3524", + "filename": "Wild-horses", + "name": "Wild horses", "number_format": 31, "remove_quotes": true, - "separator": "SPACE", + "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true - }, + } + ], + "subcategory_name": "Animals" + }, + { + "datasets": [ { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "The file ch1b.dat is the waste site file, and contains the \nfollowing variables. There are NO missing values.\n\nx: Real, x-coordinate of location of an inactive hazardous waste\nsite containing trichloroethylene (TCE).\n\ny: Real, y-coordinate of location of an inactive hazardous waste\nsite containing trichloroethylene (TCE).\n\nsite: Integer, numerical label of waste site.\n Key: Site 1: Monarch Chemicals\n Site 2: IBM Endicott\n Site 3: Singer\n Site 4: Nesco\n Site 5: GE Auburn\n Site 6: Solvent Savers\n Site 7: Smith Corona\n Site 8: Victory Plaza\n Site 9: Hadco\n Site 10: Morse Chain\n Site 11: Groton", - "download": "http://lib.stat.cmu.edu/datasets/csb/ch1b.dat", - "filename": "Disease-Clusters", - "name": "Spatial Pattern Analysis to Detect Rare Disease Clusters", + "description": "A biology student studied the effect of10 different fertilizers on the growth of mung bean sprouts. She sprouts 12 beans in each of 10 different petri dishes, and adds the same amount of fertilizer to each dish. After one week she measures the heights of the 120 sprouts in millimeters. ", + "download": "https://dasl.datadescription.com/download/data/3203", + "filename": "Fertilizers", + "name": "Fertilizers", "number_format": 31, "remove_quotes": true, - "separator": "SPACE", + "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, - "use_first_row_for_vectorname": false + "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "The file ch17.dat contains the following 15 variables:\n\nVariable Description\n\nOBS Observation number\nCN Center obtaining and reading the scan\nID Scan ID\nBA1 Bone area (sq cm) from centralized Reader 1\nBA2 Bone area (sq cm) from centralized Reader 2\nBA3 Bone area (sq cm) from centralized Reader 3\nBC1 Bone mineral content (gm) from centralized Reader 1\nBC2 Bone mineral content (gm) from centralized Reader 2\nBC3 Bone mineral content (gm) from centralized Reader 3\nBMD1 Bone mineral density (gm/sq cm) from centralized Reader 1\nBMD2 Bone mineral density (gm/sq cm) from centralized Reader 2\nBMD3 Bone mineral density (gm/sq cm) from centralized Reader 3\nBA Bone area (sq cm) from participating center\nBC Bone mineral content (gm) from participating center\nBMD Bone mineral density (gm/sq cm) from participating center\n", - "download": "http://lib.stat.cmu.edu/datasets/csb/ch17.dat", - "filename": "Bone-Mineral", - "name": "Quality Control for Bone Mineral Density Scans", + "description": "In 1936 Sir Ronald Fisher presented data on irises as the example in a famous statistics paper. Ever since, “Fisher’s Iris data” have been a feature of statistics texts. Fisher presents 4 measurements of Iris flowers of three species. Can we differentiate the species? If so, how best to do that?", + "download": "https://dasl.datadescription.com/download/data/3206", + "filename": "Fisher-Irises", + "name": "Fisher’s Irises", "number_format": 31, "remove_quotes": true, - "separator": "SPACE", + "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, - "use_first_row_for_vectorname": false + "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "The file ch21a.dat contains the spontaneous activity and rectal\ntemperature data (416 observations of 6 variables) There are no missing values.\n\nVariable List:\n\nOBS:\t\tObservation identification number.\n\nMORPHINE:\tDose of morphine sulfate (mg/kg) injected into study mice. The \n\t\trange is 0 to 8.0.\n\nDEL9_THC:\tDose of Delta9-THC (mg/kg) injected into study mice. The \n\t\trange is from 0 to 15.0.\n\nREP:\t\tIdentification of study replication. The entire 5x7 factorial \n\t\tdesign was replicated.\n\nSPON_ACT:\tSpontaneous Activity as defined by the number of interruptions \n\t\tof a photocell beam in a clear plastic cage over a 10 minute \n\t\tperiod of time.\n\nTEMP_B:\t\tRectal Temperature at baseline (just prior to treatment).\n\nTEMP_60:\tRectal Temperature at 60 minutes post treatment injection.\n\n\n", - "download": "http://lib.stat.cmu.edu/datasets/csb/ch21a.dat", - "filename": "Drug-Interactions", - "name": "Drug Interactions Between Morphine and Marijuana\n", + "description": "The Hopkins Memorial Forest is a 2500-acre reserve in Massachusetts, New York, and Vermont managed by the Williams College Center for Environmental Studies (CES). As part of its mission, the CES monitors forest resources and conditions over the long term. ", + "download": "https://dasl.datadescription.com/download/data/3271", + "filename": "Hopkins-Forest", + "name": "Hopkins Forest", "number_format": 31, "remove_quotes": true, - "separator": "SPACE", + "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, - "use_first_row_for_vectorname": false + "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "The file ch21b.dat contains the tail-flick data (510 observations of \n6 variables) Missing data are encoded with a period. \n\nVariable List:\n\nOBS:\t\tObservation identification number.\n\nREP:\t\tIdentification of study. Two 5x7 factorial experiments and one \n\t\t5x5 factorial experiment are included.\n\nMORPHINE:\tDose of morphine sulfate (mg/kg) injected into study mice. The \n\t\trange is 0 to 8.0.\n\nDEL9_THC:\tDose of Delta9-THC (mg/kg) injected into study mice. The \n\t\trange is from 0 to 15.0.\n\nFLICK_C:\tControl Flick Time. The number of seconds required for the \n\t\tmouse to flick it tail from beneath a heat stimulus prior to \n\t\ttreatment.\n\nFLICK_T::\tTest Flick Time. The number of seconds required for the \n\t\tmouse to flick it tail from beneath a heat stimulus post \n\t\ttreatment. A 10 sec maximum latency was imposed.", - "download": "http://lib.stat.cmu.edu/datasets/csb/ch21b.dat", - "filename": "Drug-Interactions-2", - "name": "Drug Interactions Between Morphine and Marijuana\n", + "description": "One can determine how old a tree is by counting its rings, but that requires either cutting the tree down or extracting a sample from the tree’s core. Can we estimate the tree’s age simply from its diameter?A forester measured 27 trees of the same species that had been cut down, and counted the rings to determine the ages of the trees. ", + "download": "https://dasl.datadescription.com/download/data/3277", + "filename": "old-tree", + "name": "How old is that Tree", "number_format": 31, "remove_quotes": true, - "separator": "SPACE", + "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, - "use_first_row_for_vectorname": false - } - ], - "subcategory_name": "Other" - }, - { - "datasets": [ + "use_first_row_for_vectorname": true + }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "Why do older people often seem not to remember things as well as younger people? Do they not pay attention? Do they just not process the material as thoroughly? One theory regarding memory is that verbal material is remembered as a function of the degree to which is was processed when it was initially presented. Eysenck (1974) randomly assigned 50 younger subjects and 50 older (between 55 and 65 years old) to one of five learning groups. The Counting group was asked to read through a list of words and count the number of letters in each word. This involved the lowest level of processing. The Rhyming group was asked to read each word and think of a word that rhymed with it. The Adjective group was asked to give an adjective that could reasonably be used to modify each word in the list. The Imagery group was instructed to form vivid images of each word, and this was assumed to require the deepest level of processing. None of these four groups was told they would later be asked to recall the items. Finally, the Intentional group was asked to memorize the words for later recall. After the subjects had gone through the list of 27 items three times they were asked to write down all the words they could remember. \n\n\n\n\nVariable\n\nDescription\n\n\n\n\nAge\n\nYounger or Older\n\nProcess\n\nThe level of processing: Counting, Rhyming, Adjective, Imagery or Intentional\n\nWords\n\nNumber of words recalled\n", - "download": "http://www.statsci.org/data/general/eysenck.txt", - "filename": "eysenck", - "name": "Age and Memory", + "description": "As the number of oranges on a tree increases, the fruit tends to get smaller. The dataset gives numbers of oranges/tree and average weight/orange (in pounds).", + "download": "https://dasl.datadescription.com/download/data/3385", + "filename": "Oranges", + "name": "Oranges", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "Nolen-Hoeksema and Morrow (1991) had the good fortune to have measured depression among college students 2 weeks before the Loma Prieta earthquake in California in 1989. Nolen-Hoeksema and Morrow collected repeat data to track the students’ adjustments to the earthquake. Measurements were taken every 3 weeks starting 2 weeks before the earthquake to 10 weeks after. The data were recreated by Howell (1999) based on the Nolen-Hoeksema and Morrow findings. Each row gives the depression scores for one student.\n\n\nVariable\n\nDescription\n\n\n\n\n\nWeek0\n\nDepression scores 2 weeks before the earthquake\n\nWeek3\n\nDepression scores one week the quake\n\nWeek6\n\nDepression scores 4 weeks after the quake\n\nWeek9\n\nDepression scores 7 weeks after the quake\n\nWeek12\n\nDepression scores 10 weeks after the quake\n", - "download": "http://www.statsci.org/data/general/lomaprie.txt", - "filename": "lomaprie", - "name": "Depression Before and After an Earthquake", + "description": "An experiment on mung beans was performed\nto investigate the environmental effects of salinity and\nwater temperature on sprouting. Forty beans were randomly\nallocated to each of 36 petri dishes that were subject\nto one of four levels of Salinity (0, 4, 8, and 12 ppm)\nand one of three Temperatures (32°, 34°, or 36° C). After\n48 hours, the biomass of the sprouts in gm was measured. The percent of beans germinating is also recorded.", + "download": "https://dasl.datadescription.com/download/data/3458", + "filename": "Sprouts", + "name": "Sprouts", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": " In a random sample of U.S. adults surveyed in December 2011, Pew Research asked how important it is “to you personally” to be successful in a high-paying career or profession. Responses are recorded by sex and age. ", - "download": "https://dasl.datadescription.com/download/data/3071", - "filename": "Being-successful", - "name": "Being successful", + "description": "Tree growth", + "download": "https://dasl.datadescription.com/download/data/3497", + "filename": "Tree-growth", + "name": "Tree growth", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "A researcher at Cornell University wanted to know how friendship might affect simple sales such as this. She randomly divided subjects into two groups and gave each group descriptions of items they might want to buy. One group was told to imagine buying from a friend whom they expected to see again. The other group […] ", - "download": "https://dasl.datadescription.com/download/data/3090", - "filename": "Buy-from-a-friend", - "name": "Buy from a friend", + "description": "Vineyards", + "download": "https://dasl.datadescription.com/download/data/3513", + "filename": "Vineyards", + "name": "Vineyards", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true - }, + } + ], + "subcategory_name": "Plants" + }, + { + "datasets": [ { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "The September 1998 issue of the American T\nPsychologist published an article by Kraut et al. that\nreported on an experiment examining “the social and\npsychological impact of the Internet on 169 people in\n73 households during their first 1 to 2 years online.” In the\nexperiment, 73 households were offered free Internet access\nfor 1 or 2 years in return for allowing their time and activity\nonline to be tracked. The members of the households who\nparticipated in the study were also given a battery of tests\nat the beginning and again at the end of the study. The\nconclusion of the study made news headlines: Those who\nspent more time online tended to be more depressed at the\nend of the experiment.\nThe news reports about this study clearly concluded that\nusing the Internet causes depression. Is such a conclusion warranted?", - "download": "https://dasl.datadescription.com/download/data/3158", - "filename": "Depression-and-the-internet", - "name": "Depression and the internet", + "description": "Meteor Crater in Arizona was the first recognized impact crater and was identified as such only in the 1920s. With the help of satellite images, more and more craters have been identified; now more than 180 are known. These, of course, are only a small sample of all the impacts the earth has experienced: Only 29% of earth’s surface is land, and many craters have been covered or eroded away. Astronomers have recog-nized a roughly 35 million-year cycle in the frequency of cratering, although the cause of this cycle is not fully understood.\nThe data hold information about craters. craters from the most recent 35Ma (million years) may be the more reliable data, and are suitable for analyses relating age and diameter.", + "download": "https://dasl.datadescription.com/download/data/3142", + "filename": "Craters", + "name": "Craters", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true - }, + } + ], + "subcategory_name": "Geology" + }, + { + "datasets": [ { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "A Harvard psychologist recruited 75 female hotel maids to participate in a study. She randomly selected 41 of them, whom she informed (truthfully) that the work they do satisfies the Surgeon General’s recommendations for an active lifestyle, providing examples to show that their work is good exercise. The other 34 were told nothing. Various characteristics, such as weight, body fat, body mass index and blood pressure were recorded at the start of the study and again after four weeks. The researcher was interested in whether the information she provided would result in measurable physical changes. If there is a difference, it might challenge our understanding of the placebo effect because being informed could make a difference.", - "download": "https://dasl.datadescription.com/download/data/3273", - "filename": "Hotel-maids", - "name": "Hotel maids", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "In an experiment to test ginkgo bloba, subjects were assigned randomly to take ginkgo biloba supplements or a placebo. Their memory was tested to see whether it improved. ", - "download": "https://dasl.datadescription.com/download/data/3335", - "filename": "Memory", - "name": "Memory", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The New York Times combined survey data (economix.blogs.nytimes.com/2013/\n07/10/working-parents-wanting-fewer-hours/) with data from\nthe U.S. Bureau of Labor Statistics (BLS) (www.bls.gov/news\n.release/archives/famee_04262013.htm) comparing how mothers\nand fathers would like to allocate their time compared with\nwhat they actually do. They asked a sample of parents with\nchildren 18 or under:\n“If money were no object, and you were free to do whatever\nyou wanted, would you stay at home, would you work full\ntime, or would you work part time?”\nPercent of respondents to this question choosing each\nalternative are reported in the “Desire” columns of the table.\nData in the “Actual” column are from the BLS. (Note:\n“Unemployed” = unemployed and actively seeking work.)\nThe table reports column percents (which may not add to\n100% due to rounding)", - "download": "https://dasl.datadescription.com/download/data/3344", - "filename": "Mothers-fathers-aspirations", - "name": "Mothers and fathers aspirations", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "In a study published in the journal Psychological Science, Rauscher, Shaw, and Ky reported that when students were given a spatial reasoning section of a standard IQ test, those who listened to Mozart for 10 minutes improved their scores more than those who simply sat quietly. ", - "download": "https://dasl.datadescription.com/download/data/3350", - "filename": "Mozart", - "name": "Mozart", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Researchers interviewed participants to find some who reliably fell asleep and awoke on one side and who could remember their dreams. They found 63 participants, of whom 41 were right-side sleepers and 22 slept on their left side. Then they interviewed them about their dreams. Of the 41 right-side sleepers, only 6 reported often having nightmares. But of the 22 left-side sleepers 9 reported nightmares. Is the difference significant?", - "download": "https://dasl.datadescription.com/download/data/3366", - "filename": "Nightmares", - "name": "Nightmares", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Stereograms appear to be composed entirely of\nrandom dots. However, they contain separate images that a\nviewer can “fuse” into a three-dimensional (3D) image by staring\nat the dots while defocusing the eyes. An experiment was\nperformed to determine whether knowledge of the embedded\nimage affected the time required for subjects to fuse the images.\nOne group of subjects (group NV) received no information or\njust verbal information about the shape of the embedded object.\nA second group (group VV) received both verbal information\nand visual information (specifically, a drawing of the object).\nThe experimenters measured how many seconds it took for the\nsubject to report that he or she saw the 3D image.", - "download": "https://dasl.datadescription.com/download/data/3459", - "filename": "Stereograms", - "name": "Stereograms", + "description": "-", + "download": "https://dasl.datadescription.com/download/data/3094", + "filename": "Carbon-footprint", + "name": "Carbon footprint", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "The file ch20.dat contains the following variables:\n\nid subject identifier\nclinical indicator for selection into clinical sample:\n 1=in clinical sample; 0=not in clinical sample\nstratum stratum membership:\n 1=high screen; 2=low screen blacks;\n 3=low screen whites\nrace subject's self-reported race:\n 1=white; 2=black\ngender subject's gender:\n 1=male; 2=female\nrparents subject's guardian status:\n 1=does not live with both natural parents;\n 0=lives with both natural parents\ncesdtot subject's total center for epidemiologic studies depression\n scale score (range 0-60)\ncohtot subject's total cohesion score, based on faces-ii\n (range 16-80)\nmdd clinical diagnosis of major depression:\n 1=positive diagnosis; 0=negative diagnosis\n 9=missing for subjects not in clinical sample\nweight sampling weights used in logistic regression; defined as\n number of subjects in screening sample in each stratum", - "download": "http://lib.stat.cmu.edu/datasets/csb/ch20.dat", - "filename": "Adolescent-Depression", - "name": "Two-Stage Sampling Designs for Adolescent Depression Studies", - "number_format": 31, - "remove_quotes": true, - "separator": "SPACE", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": false - } - ], - "subcategory_name": "Psychology" - }, - { - "datasets": [ - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "A study examined the health risks of smoking measured the cholesterol levels of people who had smoked for at least 25 years and people of similar ages who had smoked for no more than 5 years and then stopped", - "download": "https://dasl.datadescription.com/download/data/3111", - "filename": "Cholesterol-and-smoking", - "name": "Cholesterol and smoking", + "description": "-", + "download": "https://dasl.datadescription.com/download/data/3095", + "filename": "Carbon-footprint-2015", + "name": "Carbon footprint 2015", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "Data on 816 brands of cigarettes. What relationships are there among the nicotine content, tars, and CO? Are any brands unusually high or low in nicotine? Can you account for that? ", - "download": "https://dasl.datadescription.com/download/data/3113", - "filename": "Cigarettes", - "name": "Cigarettes", + "description": "-", + "download": "https://dasl.datadescription.com/download/data/3240", + "filename": "Gemstones", + "name": "Gemstones", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "Researchers measured the concentration (nanograms per milliliter) of cotinine in the blood\nof three groups of people: nonsmokers who have not been exposed to smoke, nonsmokers\nwho have been Exposed To Smoke (ETS), and smokers. Cotinine is left in the blood when\nthe body metabolizes nicotine, so its value is a direct measurement of the effect of passive smoke exposure.", - "download": "https://dasl.datadescription.com/download/data/3389", - "filename": "Passive-smoke", - "name": "Passive smoke", + "description": "It is a common belief that Yellowstone’s most famous geyser erupts once an hour at very predictable intervals. But, in fact, the intervals between eruptions can vary greatly. Can we predict the interval from, for example, the duration of the previous eruption? Are there other patterns in the data worth noting? ", + "download": "https://dasl.datadescription.com/download/data/3380", + "filename": "Old-Faithful", + "name": "Old Faithful", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "The Centers for Disease Control and Prevention\ntrack cigarette smoking in the United States. How has the percentage of people who smoke changed since the danger became clear during the last half of the 20th\ncentury? The data give percentages of smokers among\nmen 18–24 years of age, as estimated by surveys, from 1965\nthrough 2014.", - "download": "https://dasl.datadescription.com/download/data/3455", - "filename": "Smoking-2014", - "name": "Smoking 2014", + "description": "Ozone levels (in parts per billion, ppb) were recorded at sites in New Jersey monthly between 1926 and 1971. Here are boxplots of the data for each month (over the 46 years), lined up in order (January = 1): ", + "download": "https://dasl.datadescription.com/download/data/3386", + "filename": "Ozone", + "name": "Ozone", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "There has been a steady decline the the percentage of pregnant mothers who smoke. These data document the trend. The run only until 2011, which appears to be the latest date for which the CDC has data. ", - "download": "https://dasl.datadescription.com/download/data/3456", - "filename": "Smoking-and-Pregnancy-2011", - "name": "Smoking and Pregnancy 2011", + "description": "The National Interagency Fire Center reports statistics about wildfires. They report data from 1960, but the years 1960-1984 are so different from subsequent years that they can’t be analyzed together. These data are for 1985-2015. Is there a pattern over time? What is the relationship between the number of fires and the acres affected? Are fires getting larger or smaller on average?", + "download": "https://dasl.datadescription.com/download/data/3523", + "filename": "Wildfires-2015", + "name": "Wildfires 2015", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Measurements of weight and tar, nicotine, and carbon monoxide content\nare given for 25 brands of domestic cigarettes.", - "download": "http://jse.amstat.org/datasets/cigarettes.dat.txt", - "filename": "Cigarette_", - "name": "Cigarette data for an introduction to multiple regression", - "number_format": 31, - "remove_quotes": true, - "separator": "SPACE", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": false - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The ten variables listed in each file are: \n\nAge: Age on January 1, 1982. \nGender: 0=Male, 1=Female \nEducation: 0=no college, 1=some college \nSmoker: 1=never, 2=former, 3=current \nCigarettes/day: values rounded UP to the nearest 5.\nYears smoked: Number of years smoked as of January 1, 1982. \nYears quit: Number of years since smoking cessation, as of \n January 1, 1982 (zero indicates less than one year)\nFollowup Time: Years from January 1, 1982 until death or last\n interview.\nDeath codes: 0=alive, 1=death from other causes, 2=lung cancer death.\nFreq: the frequency at which each combination of variables occured.", - "download": "http://lib.stat.cmu.edu/datasets/csb/ch19a.dat", - "filename": "never-smokers", - "name": "never-smokers", - "number_format": 31, - "remove_quotes": true, - "separator": "SPACE", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": false - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The ten variables listed in each file are: \n\nAge: Age on January 1, 1982. \nGender: 0=Male, 1=Female \nEducation: 0=no college, 1=some college \nSmoker: 1=never, 2=former, 3=current \nCigarettes/day: values rounded UP to the nearest 5.\nYears smoked: Number of years smoked as of January 1, 1982. \nYears quit: Number of years since smoking cessation, as of \n January 1, 1982 (zero indicates less than one year)\nFollowup Time: Years from January 1, 1982 until death or last\n interview.\nDeath codes: 0=alive, 1=death from other causes, 2=lung cancer death.\nFreq: the frequency at which each combination of variables occured.", - "download": "http://lib.stat.cmu.edu/datasets/csb/ch19b.dat", - "filename": "current-smokers-m", - "name": "current smokers: male", - "number_format": 31, - "remove_quotes": true, - "separator": "SPACE", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": false - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The ten variables listed in each file are: \n\nAge: Age on January 1, 1982. \nGender: 0=Male, 1=Female \nEducation: 0=no college, 1=some college \nSmoker: 1=never, 2=former, 3=current \nCigarettes/day: values rounded UP to the nearest 5.\nYears smoked: Number of years smoked as of January 1, 1982. \nYears quit: Number of years since smoking cessation, as of \n January 1, 1982 (zero indicates less than one year)\nFollowup Time: Years from January 1, 1982 until death or last\n interview.\nDeath codes: 0=alive, 1=death from other causes, 2=lung cancer death.\nFreq: the frequency at which each combination of variables occured.", - "download": "http://lib.stat.cmu.edu/datasets/csb/ch19c.dat", - "filename": "current-smokers-f", - "name": "current smokers: female", - "number_format": 31, - "remove_quotes": true, - "separator": "SPACE", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": false - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The ten variables listed in each file are: \n\nAge: Age on January 1, 1982. \nGender: 0=Male, 1=Female \nEducation: 0=no college, 1=some college \nSmoker: 1=never, 2=former, 3=current \nCigarettes/day: values rounded UP to the nearest 5.\nYears smoked: Number of years smoked as of January 1, 1982. \nYears quit: Number of years since smoking cessation, as of \n January 1, 1982 (zero indicates less than one year)\nFollowup Time: Years from January 1, 1982 until death or last\n interview.\nDeath codes: 0=alive, 1=death from other causes, 2=lung cancer death.\nFreq: the frequency at which each combination of variables occured.", - "download": "http://lib.stat.cmu.edu/datasets/csb/ch19d.dat", - "filename": "former-smokers-mnc", - "name": "former smokers: male, no college", - "number_format": 31, - "remove_quotes": true, - "separator": "SPACE", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": false - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The ten variables listed in each file are: \n\nAge: Age on January 1, 1982. \nGender: 0=Male, 1=Female \nEducation: 0=no college, 1=some college \nSmoker: 1=never, 2=former, 3=current \nCigarettes/day: values rounded UP to the nearest 5.\nYears smoked: Number of years smoked as of January 1, 1982. \nYears quit: Number of years since smoking cessation, as of \n January 1, 1982 (zero indicates less than one year)\nFollowup Time: Years from January 1, 1982 until death or last\n interview.\nDeath codes: 0=alive, 1=death from other causes, 2=lung cancer death.\nFreq: the frequency at which each combination of variables occured.", - "download": "http://lib.stat.cmu.edu/datasets/csb/ch19e.dat", - "filename": "former-smokers-msc", - "name": "former smokers: male, some college ", - "number_format": 31, - "remove_quotes": true, - "separator": "SPACE", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": false - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The ten variables listed in each file are: \n\nAge: Age on January 1, 1982. \nGender: 0=Male, 1=Female \nEducation: 0=no college, 1=some college \nSmoker: 1=never, 2=former, 3=current \nCigarettes/day: values rounded UP to the nearest 5.\nYears smoked: Number of years smoked as of January 1, 1982. \nYears quit: Number of years since smoking cessation, as of \n January 1, 1982 (zero indicates less than one year)\nFollowup Time: Years from January 1, 1982 until death or last\n interview.\nDeath codes: 0=alive, 1=death from other causes, 2=lung cancer death.\nFreq: the frequency at which each combination of variables occured.", - "download": "http://lib.stat.cmu.edu/datasets/csb/ch19f.dat", - "filename": "former-smokers-fnc", - "name": "former smokers: female, no college", - "number_format": 31, - "remove_quotes": true, - "separator": "SPACE", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": false - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The ten variables listed in each file are: \n\nAge: Age on January 1, 1982. \nGender: 0=Male, 1=Female \nEducation: 0=no college, 1=some college \nSmoker: 1=never, 2=former, 3=current \nCigarettes/day: values rounded UP to the nearest 5.\nYears smoked: Number of years smoked as of January 1, 1982. \nYears quit: Number of years since smoking cessation, as of \n January 1, 1982 (zero indicates less than one year)\nFollowup Time: Years from January 1, 1982 until death or last\n interview.\nDeath codes: 0=alive, 1=death from other causes, 2=lung cancer death.\nFreq: the frequency at which each combination of variables occured.", - "download": "http://lib.stat.cmu.edu/datasets/csb/ch19g.dat", - "filename": "former-smokers-fsc", - "name": "former smokers: female, some college", - "number_format": 31, - "remove_quotes": true, - "separator": "SPACE", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": false } ], - "subcategory_name": "Smoking" + "subcategory_name": "Other" } ] }, { - "category_name": "Nature", + "category_name": "Statistics", "subcategories": [ { "datasets": [ { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "Daily rainfall (in millimetres) was recorded over a 47-year period in Turramurra, Sydney, Australia. For each year, the wettest day was identified (that having the greatest rainfall). The data show the rainfall recorded for the 47 annual maxima.", - "download": "http://www.statsci.org/data/oz/sydrain.txt", - "filename": "sydrain", - "name": "Annual Maximums of Daily Rainfall in Sydney", - "number_format": 31, - "remove_quotes": true, - "separator": "auto", - "simplify_whitespaces": true, - "skip_empty_parts": false, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "These data were collected in a cloud-seeding experiment in Tasmania between mid-1964 and January 1971. The rainfalls are period rainfalls in inches. \nSeeded\n - \nS = seeded, U = unseeded\nSeason\n - \nAutumn, Winter, Spring Summer\nTE\n - \nrainfall in east target area\nTW\n - \nrainfall in west target area\nNC\n - \nrainfall in north control area\nSC\n - \nrainfall in south control area\nNWC\n - \nrainfall in north-west conrol area\n\n", - "download": "http://www.statsci.org/data/oz/cloudtas.txt", - "filename": "cloudtas", - "name": "Cloud Seeding in Tasmania", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The data are monthly averaged atmospheric pressure differences between Easter Island and Darwin, Australia. This difference drives the trade winds in the southern hemisphere. An annual cycle may be expected, and also longer cycles corresponding to the El Nino and to the Southern Oscillations. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nPressure\n\nMonthly average atmospheric pressure differences\n", - "download": "http://www.statsci.org/data/oz/enso.txt", - "filename": "enso", - "name": "Pressure Difference between Easter Island and Darwin", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Daily rainfall for Melbourne, from 1 January 1981 to 31 December 1990. Note that this series is 3 observations longer than the temperature series.", - "download": "http://www.statsci.org/data/oz/melbrain.txt", - "filename": "melbrain", - "name": "Melbourne Daily Rainfall", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Daily minimum and maximum temperatures for Melbourne, from 1 January 1981 to 31 December 1990. The two February 29 leap days are excluded, so there are 10 x 365 = 3650 observations.", - "download": "http://www.statsci.org/data/oz/melbtemp.txt", - "filename": "melbtemp", - "name": "Melbourne Temperatures", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Rainfall for each 6-day period for Adelaide from 1839 to 1977 inclusive. December 31 of the previous year is included in the non-leap years to make 15 6-day periods for each year. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nYear\n\n1839 - 1977\n\nPeriod\n\n1 - 61 for each year\n\nRainfall\n\nRainfall in \n", - "download": "http://www.statsci.org/data/oz/adelrain.txt", - "filename": "adelrain", - "name": "Adelaide Rainfall", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Daily 6am and 3pm temperatures for Brisbane for the decade 1977 - 1986. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nDay\n\nDay as YearMonthDay\n\nTemp06\n\n6am Temperature in degrees Celsius x 10\n\nTemp15\n\n3pm Temperature in degrees Celsius x 10\n", - "download": "http://www.statsci.org/data/oz/bristemp.txt", - "filename": "bristemp", - "name": "Brisbane Temperatures", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The columns in the data set represent the precipitation weighted mean concentrations of ions for the year 1986, for 47 sites in the United Kingdom. \n \nVariable\n \nDescription\n\nSite\n\nSite number \n\nRain\n\nRain (measured in mm) \n\nH\n\nH+ \n\nSO4\n\nSO4-2 \n\nNO3\n\nNO3- \n\nNH4\n\nNH4+ \n\nx\n\nx-coordinate (measured in cm) \n\ny\n\ny-coordinate (cm) \n\nThe measurement of NH4+ for site number 35 was not available and is represented by NA in the data set. The x- and y-cordinates were measured in cm from a map of the UK. ", - "download": "http://www.statsci.org/data/general/rainuk.txt", - "filename": "rainuk", - "name": "Acid Rain in the UK", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Froliger and Kane measured the pH (a scale on which a value of 7 is neutral and values below 7 are acidic) of water collected from precipitation events in Allegheny County, Pennsylvania between December 20, 1973 and May 23, 1974. Display the distribution of these values and describe with words and numbers what you see. ", - "download": "https://dasl.datadescription.com/download/data/3041", - "filename": "acid-rain", - "name": "Acid rain", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The data give the average January Temperature (in degrees Fahrenheit) and Latitude (in degrees north of the equator) for 59 U.S. cities. How are they related? ", - "download": "https://dasl.datadescription.com/download/data/3114", - "filename": "City-climate", - "name": "City climate", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "-", - "download": "https://dasl.datadescription.com/download/data/3115", - "filename": "City-temperatures", - "name": "City temperatures", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Global temperature from https://www.ncdc.noaa.gov/cag/data-info/global Global temperature anomaly data come from the Global Historical Climatology Network-Monthly (GHCN-M) data set and International Comprehensive Ocean-Atmosphere Data Set (ICOADS), which have data from 1880 to the present. These two datasets are blended into a single product to produce the combined global land and ocean temperature anomalies. The available timeseries of global-scale temperature anomalies are calculated with respect to the 20th century average, while the mapping tool displays global-scale temperature anomalies with respect to the 1981-2010 base period. For more information on these anomalies, please visit Global Surface Temperature Anomalies. CO2 from ftp://aftp.cmdl.noaa.gov/products/trends/co2/co2_annmean_mlo.txt DJIA from https://www.measuringworth.com\n\nScientists claim that changes in the mean global temperature are primarily due to changes in CO2 levels. Both trends are here from 1959 to 2016. For an alternative, the data includes the annual closing price of the Dow Jones Industrial Average. Can it predict global temperature?", - "download": "https://dasl.datadescription.com/download/data/3116", - "filename": "Climate-change-2016", - "name": "Climate change 2016", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Hurricane frequencies", - "download": "https://dasl.datadescription.com/download/data/3279", - "filename": "Hurricane-frequencies", - "name": "Hurricane frequencies", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Hurricane history", - "download": "https://dasl.datadescription.com/download/data/3280", - "filename": "Hurricane-history", - "name": "Hurricane history", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The barometric pressure at the center of a hurricane is often used to measure the strength of the hurricane because it can predict the maximum wind speed of the storm. How well is the wind speed predicted by the barometric pressure? ", - "download": "https://dasl.datadescription.com/download/data/3278", - "filename": "Hurricanes-2015", - "name": "Hurricanes 2015", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The Los Angeles Almanac reports a number of variables about the weather in LA. Among them is the annual rainfall, reported here for 1991-2018. It is worthwhile to look up any outliers. ", - "download": "https://dasl.datadescription.com/download/data/3555", - "filename": "LA-rainfall", - "name": "LA rainfall", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Is global climate change leading to an increase in the number of major hurricanes? The data gives the number of hurricanes classified as major hurricanes in the Atlantic Ocean each year from 1944 through 2013, as reported by NOAA: ", - "download": "https://dasl.datadescription.com/download/data/3323", - "filename": "Major-hurricane-2013", - "name": "Major hurricanes 2013", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Tornadoes 2015\nSource: www.nws.noaa.gov/om/hazstats/resources/weather_fatalities.pdf", - "download": "https://dasl.datadescription.com/download/data/3488", - "filename": "Tornadoes", - "name": "Tornadoes 2015", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Tracking hurricanes 2015", - "download": "https://dasl.datadescription.com/download/data/3493", - "filename": "Tracking-hurricanes-2015", - "name": "Tracking hurricanes 2015", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The National Hurricane Center (NHC) of the National Oceanic and Atmospheric\nAdministration (NOAA) tries to predict the path each hurricane will take. But hurricanes\ntend to wander around aimlessly and are pushed by fronts and other weather\nphenomena in their area, so they are notoriously difficult to predict. Even relatively small changes in a hurricane’s track can make big differences in the damage it causes. The data give the mean error in nautical miles of the NHC’s 72-hour predictions of Atlantic hurricanes for 1970-2017. NOAA refers to these errors as the Forecast\nerror or the Prediction error and reports annual results.", - "download": "https://dasl.datadescription.com/download/data/3494", - "filename": "Tracking-hurricanes-2016", - "name": "Tracking hurricanes 2016", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Tsunamis 2016", - "download": "https://dasl.datadescription.com/download/data/3500", - "filename": "Tsunamis-2016", - "name": "Tsunamis 2016", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "http://www.ngdc.noaa.gov/hazard/tsu_db.shtml Extracted Event Validity 3 and 4 Cause Codes 1-5 Event Validity: 4 = definite tsunami 3 = probable tsunami 2 = questionable tsunami 1 = very doubtful tsunami 0 = event that only caused a seiche or disturbance in an inland river -1 = erroneous entry Cause Code: Valid values: 0 to 11 The source of the tsunami: 0 = Unknown 1 = Earthquake 2 = Questionable Earthquake 3 = Earthquake and Landslide 4 = Volcano and Earthquake 5 = Volcano, Earthquake, and Landslide 6 = Volcano 7 = Volcano and Landslide 8 = Landslide 9 = Meteorological 10 = Explosion 11 = Astronomical Tide", - "download": "https://dasl.datadescription.com/download/data/3501", - "filename": "Tsunamis-2018", - "name": "Tsunamis 2018", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Weather forecasts", - "download": "https://dasl.datadescription.com/download/data/3519", - "filename": "Weather-forecasts", - "name": "Weather forecasts", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Wind speed", - "download": "https://dasl.datadescription.com/download/data/3528", - "filename": "Wind-speed", - "name": "Wind speed", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The file ch3a.dat includes the validation data collected at the stationary\nambient monitoring site. The variables are:\n\n 1. Date, in MM/DD/YY format,\n DC\n 2. 12-hour average daytime continuous ozone concentration, X ,\n 1\n DP\n 3. 12-hour average daytime passive ozone concentration, X ,\n 1\n NC\n 4. 12-hour average nighttime continuous ozone concentration, X , and\n 1\n NP\n 5. 12-hour average nighttime passive ozone concentration, X .\n 1", - "download": "http://lib.stat.cmu.edu/datasets/csb/ch3a.dat", - "filename": "Ozone_", - "name": "Prediction Models for Personal Ozone Exposure Assessment", - "number_format": 31, - "remove_quotes": true, - "separator": "SPACE", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": false - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The file ch3b.dat includes the personal ozone exposure data. The \nvariables are:\n\n 1. Subject identification number, ranging from 1 to 23,\n\n 2. Date, in MM/DD/YY format,\n\n 3. Home region, ranging from 1 to 6,\n\n 4. 12-hour average daytime personal ozone concentration, Y,\n\n 5. 12-hour average daytime continuous ozone concentration at the\n DC\n stationary site, X ,\n 1\n\n 6. 12-hour average nighttime continuous ozone concentration at the\n NC\n stationary site, X ,\n 1\n O\n 7. 24-hour average home outdoor passive ozone concentration, X ,\n 1\n DI\n 8. 12-hour average home indoor daytime passive ozone concentration, X ,\n 1\n NI\n 9. 12-hour average home indoor nighttime passive ozone concentration, X ,\n 1\n\n 10. Prediction values for a 12-hour microenvironmental model based\n H\n on hourly ozone concentrations, X ,\n 2\n O\n 11. Fraction of time spent anywhere outdoors, X ,\n 3\n I\n 12. Fraction of time spent at home indoors, X , and\n 3\n\n 13. Indicator variable for whether the child stayed near the\n S\n home for the whole day, X , where 1 = yes, 0 = no.\n 3", - "download": "http://lib.stat.cmu.edu/datasets/csb/ch3b.dat", - "filename": "Ozone2", - "name": "Prediction Models for Personal Ozone Exposure Assessment", - "number_format": 31, - "remove_quotes": true, - "separator": "SPACE", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": false - } - ], - "subcategory_name": "Weather" - }, - { - "datasets": [ - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The ocean swell produces spectacular eruptions of water through a hole in the cliff at Kiama, about 120km south of Sydney, known as the Blowhole. The times at which 65 successive eruptions occurred from 1340 hours on 12 July 1998 were observed using a digital watch. \nJim Irish writes \nAnyone who has visited the Blowhole more than once knows that the rate and volume of eruptions varies. This variation occurs at several timescales. We might expect that part is explained by the tides, so that eruptions are more frequent and spectacular when the tide is very high, and eruptions obviously depend on the presence of a large ocean swell generated by prolonged strong winds over the ocean well offshore from Kiama. Hence, any stochastic model fitted to data observed over a short period of time is only applicable to that period, and perhaps a few hours either side of the observations. But we might infer from the model fitted to those data that a similar model applies more generally. ", - "download": "http://www.statsci.org/data/oz/kiama.txt", - "filename": "kiama", - "name": "Kiama Blowhole Eruptions", - "number_format": 31, - "remove_quotes": true, - "separator": "auto", - "simplify_whitespaces": true, - "skip_empty_parts": false, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The data records the length of rivers in the South Island of New Zealand. The lengths are given in kilometres. The second variable, FlowsInto, indicates whether the river flows into the Pacific Ocean (0) or the Tasman Sea (1). A map of the island's rivers is included here.", - "download": "http://www.statsci.org/data/oz/nzrivers.txt", - "filename": "nzrivers", - "name": "Length of New Zealand Rivers", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Date on the concentration of polychlorinated biphenyl (PCB) residues in a series of lake trout from Cayuga Lake, NY, were reported in Bache et al (1972). The ages of the fish were accurately known, because the fish were annually stocked as yearlings and distinctly marked as to year class. Each whole fish was mechanically chopped, ground, and thoroughly mixed, and 5-gram samples taken. The samples were treated and PCB residues in parts per million (ppm) were estimated using column chromatography. \nBates and Watts (1988) use a linear model \nlog(PCB) = b1 + b2 Age1/3 \nbut they remark that the nonlinear model \nlog(PCB) = b1 + b2 Ageq \nis slightly better. \n\n\n\n\nVariable\n\nDescription\n\n\n\n\nAge\n\nAge of trout (years)\n\nPCB\n\nPCB concentration (ppm)\n", - "download": "http://www.statsci.org/data/general/troutpcb.txt", - "filename": "troutpcb", - "name": "PCB Concentrations in Lake Trout", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Jaffe, Parker and Wilson have investigated the concentration of several hydrophobic organic substances (such as hexachlorobenzene, chlordane, heptachlor, aldrin, dieldrin, endrin) in the Wolf River in Tennessee. Measurements were taken downstream of an abandoned dump site that had previously been used by the pesticide industry to dispose of its waste products. \nIt was expected that these hydrophic substances might have a nonhomogeneous vertical distribution in the river because of differences in density between these compounds and water and because of the adsorption of these compounds on sediments, which could lead to higher concentrations on the bottom. It is important to check this hypothesis because the standard procedure of sampling at six-tenths of the depth could miss the bulk of these pollutants if the distribution were not uniform. \nGrab samples were taken with a La Motte-Vandorn water sampler of 1 litre capacity at various depths of the river. This sampler consists of a horizontal plexiglas tube of 7 centimetres diameter and a plunger of each side which shuts the sampler when the sampler is at the desired depth. Ten surface, 10 mid-depth and 10 bottom samples were collected, all within a relatively short period. Until they were analysed the samples were stored in 1-quart mason jars at low temperature. \nIn the analysis of the samples, a 250-millilitre water sample was taken from each mason jar and was extracted with 1 millilitre of either hexanes or petroleum ether. A sample of the extract was then injected into a gas chromatograph and the output was compared against standards of known concentrations. The test procedure was repeated two more times, injecting different samples of the extract in the gas chromatograph. The average aldrin and hexachlorobenzene (HCB) concentrations (in nanograms per liter) in these 30 samples are given in the data.", - "download": "http://www.statsci.org/data/general/wolfrive.txt", - "filename": "wolfrive", - "name": "Wolf River Pollution", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The following data from the Statistical Abstract of the United States give the number of accidental oil spills at sea and the amount of oil lost in these spills for the years 1973 - 1985. \n\n\n\n\nVariable\n\nDescription\n\n\n\n\nYear\n\nYear\n\nSpills\n\nNumber of spills\n\nOil\n\nAmount of oil lost (thousands of metric tonnes)\n\n\n\n", - "download": "http://www.statsci.org/data/general/spills.txt", - "filename": "spills", - "name": "Accidental Oil Spills", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "These data refer to a survey of the fauna on the sea bed lying between the coast of northern Queensland and the Great Barrier Reef. The sampling region covered a zone which was closed to commercial fishing, as well as neighbouring zones where fishing was permitted. In view of the large numbers and types of species captured in the survey the catch was summarized as a score, on a log weight scale, which combines information across species. Two such scores are available. The details of the survey, and a full analysis of the data, are in Poiner et al (1997). \n\n\n\n\nVariable\n\nDescription\n\n\n\n\nZone\n\nan indicator for the closed (1) and open (0) zones \n\nYear\n\nan indicator of 1992 (0) or 1993 (1) \n\nLatitude\n\nlatitude of the sampling position \n\nLongitude\n\nlongitude of the sampling position \n\nDepth\n\nbottom depth \n\nScore1\n\ncatch score 1 \n\nScore2\n\ncatch score 2 \n", - "download": "http://www.statsci.org/data/oz/reef.txt", - "filename": "reef", - "name": "Prawn Trawling in the Great Barrier Reef", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The annual number of deaths from floods in the United states from 1995 through 2015. Years are not provided, but the data values are in time order.", - "download": "https://dasl.datadescription.com/download/data/3211", - "filename": "Floods-2015", - "name": "Floods 2015", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Climate scientists have been observing the extent of sea ice using satellite observations. Many have expressed concern because, since 1980, the extent of sea ice has declined precipitously—possibly due to global climate change. But a multiple regression of Extent on temp and year gives a coefficient for temp that is essentially zero. ", - "download": "https://dasl.datadescription.com/download/data/3443", - "filename": "Sea-ice", - "name": "Sea ice", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "As part of the course work, a class at an upstate\nNY college collects data on streams each year. Students\nrecord a number of biological, chemical, and physical variables,\nincluding the stream name, the substrate of the stream\n(limestone (L), shale (S), or mixed (M)), the pH, the temperature\n(\u001dC), and the BCI, a measure of biological diversity.", - "download": "https://dasl.datadescription.com/download/data/3463", - "filename": "Streams", - "name": "Streams", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The file ch2.dat contains the following variables:\n\n animal - a unique identifier associated with each C. dubia tested\n conc - concentration (micro grams/L)\n brood1 - number of young produced in the first brood\n brood2 - number of young produced in the second brood\n brood3 - number of young produced in the third brood\n total - sum of young produced in the 3 broods (=brood1 + brood2 + brood3)", - "download": "http://lib.stat.cmu.edu/datasets/csb/ch2.dat", - "filename": "Pollutants", - "name": "Assessing Toxicity of Pollutants in Aquatic Systems ", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": false - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "For a selection of months in the period 1970 to 1983, a measurement of the\nocean salinity at a depth of 100 meters off the Alaskan coast, given in parts\nper thousand. Columns are:\n\n 1. year\n 2. month\n 3. salinity", - "download": "http://lib.stat.cmu.edu/crab/salinity", - "filename": "salinity-2", - "name": "ocean salinity", - "number_format": 31, - "remove_quotes": true, - "separator": "SPACE", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": false - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "For a selection of months in the period 1970 to 1983, a measurement of the\nocean temperature at a depth of 100 meters off the Alaskan coast, given in\ndegrees Celsius. Columns are:\n\n 1. year\n 2. month\n 3. temperature", - "download": "http://lib.stat.cmu.edu/crab/celsius", - "filename": "celsius", - "name": "ocean temperature", - "number_format": 31, - "remove_quotes": true, - "separator": "SPACE", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": false - } - ], - "subcategory_name": "Waters" - }, - { - "datasets": [ - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Groups of dolphins were observed off the coast of Iceland near Keflavik in 1998. The data here give the time of the day and the main activity of the group, whether travelling quickly, feeding or socializing. The dolphin groups varied in size - usually feeding or socializing groups were larger than travelling groups. \n\n\n\n\nVariable\n\nDescription\n\n\n\n\nActivity\n\nMain activity of group: travelling (Travel), feeding (Feed) or socializing (Social)\n\nPeriod\n\nTime of the day: Morning, Noon, Afternoon or Evening\n\nGroups\n\nNumber of groups observed\n\n\n\n", - "download": "http://www.statsci.org/data/general/dolpacti.txt", - "filename": "dolpacti", - "name": "Activities of Dolphin Groups", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Cairns (1988) analysed the relation between population and foraging area for seabird colonies. The following table presents their data for 22 black-legged kittiwake (a northern gull) colonies of Scotland's Shetland and Orkney Islands. Area is km2 and Population is the number of breeding pairs. ", - "download": "http://www.statsci.org/data/general/kittiwak.txt", - "filename": "kittiwak", - "name": "Kittiwake Colonies", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Some handicapped people have access to trained monkey helpers that can perform household tasks like switching things on and off. This data set gives the number of tasks each of nine monkeys can perform along with the number of years the monkeys have been working with handicapped people. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nName \n\nName of monkey\n\nYears \n\nNumber of years the monkey has worked with handicapped people\n\nTasks \n\nNumber of tasks the monkey can perform\n", - "download": "http://www.statsci.org/data/general/monkeys.txt", - "filename": "monkeys", - "name": "Trained Monkeys", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Includes brain and body weight, life span, gestation time, time sleeping, and predation and danger indices for 62 species of mammals. Of interest is to predict the time spent sleeping and the proportion of sleep time in dream sleep. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nBodyWt\n\nbody weight (kg)\n\nBrainWt\n\nbrain weight (g)\n\nNonDreaming\n\nslow wave (\"nondreaming\") sleep (hrs/day)\n\nDreaming\n\nparadoxical (\"dreaming\") sleep (hrs/day)\n\nTotalSleep\n\ntotal sleep, sum of slow wave and paradoxical sleep (hrs/day)\n\nLifeSpan\n\nmaximum life span (years)\n\nGestation\n\ngestation time (days)\n\nPredation\n\npredation index (1-5)\n1 = minimum (least likely to be preyed upon); 5 = maximum (most likely to be preyed upon)\n\nExposure\n\nsleep exposure index (1-5)\n1 = least exposed (e.g. animal sleeps in a well-protected den); 5 = most exposed\n\nDanger\n\noverall danger index (1-5) (based on the above two indices and other information)\n1 = least danger (from other animals); 5 = most danger (from other animals)\n\n\n\n", - "download": "http://www.statsci.org/data/general/sleep.txt", - "filename": "sleep_", - "name": "Sleep in Mammals", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Results of horse races at Eagle Farm, Brisbane, on 31 August 1998. The data, collected by Donald Forbes for his MS305 Data Analysis Project, give results for each horse in a sequence of 8 races. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nPosition\n\nFinishing position\n\nStarters\n\nNumber of horses in race\n\nLast\n\nFinishing position in last race\n\nSince\n\nDays since last race\n\nNumber\n\nIdentifying number of horse in race\n\nCarried\n\nWeight carried\n\nWeight\n\nHandicap weight\n\nBarrier\n\nBarrier position at start of race\n\nDistance\n\nLength of race\n\nLengths\n\nNumber of lengths that horse finished from winner\n\nOdds\n\nStarting odds\n\nStarts\n\nNumber of races previously started in\n\nAge\n\nAge of horse in years\n\nRatio\n\nProportion of wins in previous starts\n", - "download": "http://www.statsci.org/data/oz/horses.txt", - "filename": "horses", - "name": "Horse Racing at Eagle Farm", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The data give the survival times (in 10 hour units) in a 3 x 4 factorial experiment, the factors being (a) three poisons and (b) four treatments. Each combination of the two factors is used for four animals, the allocation to animals being completely randomized. \n", - "download": "http://www.statsci.org/data/general/poison.txt", - "filename": "poison", - "name": "Poison Experiment", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The data was collected by Peter Drew and Matt Seidemann, statistics students at the Queensland University of Technology, in a subject taught by Dr Margaret Mackisack. Here is their description of the data and its collection: \n\"As keen fishermen out and about on a fairly regular basis, the common arguments arise between anglers on the best rigging set up for various conditions. We decided that upon our next group outing that we would back up our opinions with hard statistical facts. Our interest led us to test the most obvious variables in the fishing rig. \n\"Of interest were firstly the rod length, as between fisherman there always tends to be a variety of rods of different sizes; secondly the type of line, in that the larger the line it would be logical that the weight would increase; thirdly the sinker weight and how it affected the casting distance. \n\"In deciding on the three variables a 2^3 factorial design seemed obvious and for our purposes seemed to be quite adequate. So the question was placed as to whether or not the above variables in any combination made any difference to the overall distance the line was cast. The rods used were 6ft and 7ft two piece boat rods, fitted with the same type of spinning reel. The variable sinkers were 8oz and 12oz round ball sinkers and the line used was either the 1kg or 2kg line of the same make. \n\"The experiment was carried out on a day that was close to windless thus lowering the relative influence of the wind. The series of casts was conducted by the same person as were the measurements thus giving uniformity to the total experiment. A break of five minutes was timed between casts so as to allow the caster to allocate the same amount of energy to each cast. The rods were not rigged by the caster; a rigger would set the rod up with a combination of sinker, line and rod, and an effort was made to keep the caster oblivious to the changes in the rig. \n\"The experiment was conducted on the rugby ovals on Oleria St, Brookside (a western surburb of Brisbane) adjacent to the RSL (Returned Serviceman League club), which for all intents and purposes would be classified as a level surface. A line was placed at one end of the field and from it the caster would cast the rod as he would given normal fishing conditions. A spotter who was also the measurer would mark the point of impact of the sinker and from it measure back to the line from which it was cast. The distance observed was subsequently rounded up to the nearest 0.5 of a metre. Two runs were made of each combination. \n\"Possible improvements: Because of the time the rigging took, both casts with each rig were done at the same time. If we did it again it would be better to use random numbers to decide the order of all sixteen casts.\" ", - "download": "http://www.statsci.org/data/oz/fishing.txt", - "filename": "fishing_", - "name": "Fishing Rod Experiment", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Four male and four female turtles had their plasma protein measured while they were well fed and after ten and twenty days of fasting. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nSubject\n\n1-8\n\nSex\n\nMale or Female\n\nFed\n\nPlasma protein while well fed (mg/ml)\n\nFasted10\n\nPlasma protein after fasting 10 days\n\nFasted20\n\nPlasma protein after fasting 20 days\n", - "download": "http://www.statsci.org/data/general/turtles.txt", - "filename": "turtles", - "name": "Plasma Protein of Fasting Turtles", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Frogs of four species had their oxygen consumption measured at two temperatures and two exercise levels. There were two frogs of each species at each temperature, and each of the two was measured both at rest and during forced exercise. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nSubject\n\n1-16\n\nSpecies\n\n1-4\n\nTemperature\n\nLow or High\n\nRest\n\nOxygen consumption (ml O2/g/hr) at rest\n\nExercise\n\nOxygen consumption during exercise\n\n\n\n\n", - "download": "http://www.statsci.org/data/general/frogs.txt", - "filename": "frogs_", - "name": "Oxygen Consumption of Frogs", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The data give the age and the length of dugongs Dugong dugon (M�ller) captured near Townsville in north Queensland, Australia. The lifespan of a dugong is 50-60 years.\nThese data were working estimates. In particular the method of determining the age of dugong has changed somewhat since the data were recorded. \n\n\n\n\nVariable\n\nDescription\n\n\n\n\nAge\n\nAge in years\n\nLength\n\nLength in metres\n\n\n\n", - "download": "http://www.statsci.org/data/oz/dugongs.txt", - "filename": "dugongs", - "name": "Age and Length of Dugongs near Townsville", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The data give the sound pressure of sonar signals (\"clicks\") from a dolphin at various ranges to target. The measurements were made off the coast of Iceland near Keflavik in 1998. The pressure measurement given is \nraw pressure + a Range \nwhere a is a known constant depending on the water density. Pressure is expected to increase with distance even after the adjustment. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nRange \n\nDistance to dolphin in metres\n\nSoundPressure \n\nWater sound pressure adjusted for water density\n", - "download": "http://www.statsci.org/data/general/dolphin.txt", - "filename": "dolphin", - "name": "Sound Pressure of Dolphin Sonar", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The observed responses are Geiger counter counts (times 10-4) used to measure the amount of radioactively tagged sulfate drug in the blood of a baboon named Brunhilda after an injection of the drug. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nHours\n\nTime in hours since injection\n\nSulfate\n\nGeiger counter counts � 10-4\n", - "download": "http://www.statsci.org/data/general/brunhild.txt", - "filename": "brunhild", - "name": "Blood Sulfate in a Baboon Named Brunhilda", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The European rabbit Oryctolagus cuniculus is a major pest in Australia. A reliable method of age determination for rabbits caught in the wild would be of importance in ecological studies. In this study, the dry weight of the eye lens was measured for 71 free-living wild rabbits of known age. Eye lens weight tends to vary much less with environmental conditions than does total body weight, and therefore may be a much better indicator of age \nThe rabbits were born and lived free in an experimental 1.7 acre enclosure at Gungahlin, ACT. The birth data and history of each individual were accurately known. Rabbits in the enclosure depended on the natural food supply. In this experiment, 18 of the eye lenses were collected from rabbits that died in the course of the study from various causes such as coccidiosis, bird predation or starvation. The remaining 53 rabbits were deliberately killed, immediately after being caught in the enclosure or after they had been kept for some time in cages. The lenses were preserved and their dry weight determined. \n\n\n\n\nVariable\n\nDescription\n\n\n\n\nAge\n\nAge of rabbit in days\n\nLens\n\nDry weight of eye lens in milligrams\n", - "download": "http://www.statsci.org/data/oz/rabbit.txt", - "filename": "rabbit", - "name": "Age and Eye Lens Weight for Rabbits in Australia", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Insects were exposed to gaseous carbon disulphide for a period of 5 hours. Eight experiments were run with different concentrations of carbon disulphide. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nDose\n\nDose of carbon disulphide\n\nExposed\n\nNumber of beetles exposed\n\nMortality\n\nNumber of beetles killed\n", - "download": "http://www.statsci.org/data/general/beetles.txt", - "filename": "beetles", - "name": "Beetle Mortality", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Bill Venables writes: \nGroups of 20 snails were held for periods of 1, 2, 3 or 4 weeks in carefully controlled conditions of temperature and relative humidity. There were two species of snail, A and B, and the experiment was designed as a 4 by 3 by 4 by 2 completely randomized design. At the end of the exposure time the snails were tested to see if they had survived; the process itself is fatal for the animals. The object of the exercise was to model the probability of survival in terms of the stimulus variables, and in particular to test for differences between species. The data are unusual in that in most cases fatalities during the experiment were fairly small. \nSpecies\n \nSnail species A or B \nExposure\n \nExposure in weeks (4 levels) \nHumidity\n \nRelative humidity (4 levels) \nTemp\n \nTemperature in degrees Celsius (3 levels) \nDeaths\n \nNumber of deaths \nN \n \nNumber of snails exposed \n", - "download": "http://www.statsci.org/data/oz/snails.txt", - "filename": "snails_", - "name": "Snail Mortality", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Activity of individually caged fiddler crabs under constant conditions for 225 consecutive hours (225 = 9*25 = 9*24 + 8). The activity scale is log(y+1) where y is mean minutes per hour. Examination of the data suggests that the logarithm was base 10. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nActivity\n\nlog(Minutes per hour+1)\n", - "download": "http://www.statsci.org/data/general/fiddler.txt", - "filename": "fiddler", - "name": "Activity of Fiddler Crabs", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Tidal shrimps from the Brisbane River move up and down the tidal area (harbour pylon for example) in accordance with the movement of the tides. In this experiment shrimps were removed from their natural environment and isolated from environmental stimulae which would allow them to measure time. Their vertical position on an inclined slope was recorded every half hour starting 20 hours after removal and continuing for one week. Also recorded is the actual tide height during the same period, and six other measures of the shrimps' activity. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nTime \n\nHours since isolation\n\nVertical \n\nVertical displacement from original position\n\nY2 - Y7 \n\nOther activity measurements\n\nTide \n\nActual tide height\n", - "download": "http://www.statsci.org/data/oz/shrimp.txt", - "filename": "shrimp_", - "name": "Movement of Tidal Shrimps in Isolation", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Monthly total number of pigs slaughtered in Victoria, from January 1980 to August 1995.", - "download": "http://www.statsci.org/data/oz/pigs.txt", - "filename": "pigs", - "name": "Pigs Slaughtered in Victoria", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Seed predators and herbivores can operate as strong selective agents in the evolution of plant defence. In this context, Delpino (1886) posed the \"ant-guard\" hypothesis to explain the role of extrafloral nectaries on plants. Extrafloral nectaries (EFN), distributed on species in over 80 plant families, occur on vegetative organs and \"outer floral parts\" not directly associated with pollination. Basically, the hypothesis states that extrafloral nectar production attracts pugnacious \"bodyguards\" (usually ants) which by their foraging activities deter the activities of herbivorous insects and seed predators. \nSince its inception, the ant-guard hypothesis has remained controversial. A few careful studies have experimentally demonstrated that ants attending EFN protect plants (von Wettstein, 1889; Inouye and Taylor, 1979; Schemske, 1980) while several recent studies showed no effect (O’Dowd and Catchpole, 1983; Tempel, 1983; Boecklen, 1984). O’Dowd and Catchpole (1983), for example, found that attendance of ants at EFN deterred other insects from developing flowerheads but that their presence decreased neither the numbers of seed predators nor damage to developing flowerheads. The object of this paper is to describe the ant-insect interactions by means of a simple probability model. \nFull experimental detail is provided by O'’Dowd and Catchpole (1983) but an outline is as follows. The plants studied were helichrysum bracteatum. Three sites were chosen in clearings in the Tallaganda State forest, 40 km. southeast of Canberra, and at each site ten pairs of plants were studied. Plants within each pair were of similar initial size and less than 1 metre apart. Within each pair, ants were excluded from one plant, while the other served as a control. The plants were censused once a week for 17 weeks over the reproductive season (from initiation of flowerheads through the postflowering phase). The data recorded for each plant included the number of flowerheads (capitula), the number of capitula with ants, and the total number of other insects. Different species of ants (predominantly Iridomyrmex spp.) and other insects were observed, but in the data here are pooled within each general category. \nTo clarify: the first column (Week) lists the week the observation was made, the second (Index) lists the index given to the pair of plants observed, the third (AntCap) is the number of capitula on the plant with ant access, the fourth (ExcCap) is the number of capitula on the plant excluded from ant access, the fifth (Ants) is the number of capitula that have ants present on them, the sixth column (AntIns) is the number of insects on the plant with ant access, and the seventh (ExcIns) is the number of insects on the plant excluded from ant access. Index number 1-10 refer to Site 1, 11-20 to Site 2 and 21-30 to Site 3.", - "download": "http://www.statsci.org/data/oz/ants.txt", - "filename": "ants", - "name": "Ant-Insect Interactions on Flowerheads", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "A new type of heart valve has been developed and is implanted in 63 dogs that have been raised on various levels of exercise. The numbers of valve transplants that succeed are recorded. Is the proportion of successful implants the same for dogs on all exercise regimens? Is there a trend with amount of exercise in the proportion of successful implants? \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nExercise\n\nAmount of exercise: 1=None, 2=Slight, 3=Moderate, 4=Vigorous\n\nImplant\n\n1=Successful, 2=Unsuccessful\n\nFrequency\n\nNumber of dogs\n\n\n\n", - "download": "http://www.statsci.org/data/general/exervalv.txt", - "filename": "exervalv", - "name": "Heart Valves in Dogs on Different Exercise Regimens", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The data give growth measurements on Tammar wallabies (Macropus eugenii). Each line is a set of measurements on an animal at a particular time. Most lengths are in tenths of millimetres. The data from some animals is very fragmentary. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nAnim\n\nAnimal number\n\nSex\n\n1=male, 2=female\n\nLoca\n\nLocation of animal\n\nLeng\n\nLength of animal (tenths of a millimetre)\n\nHead\n\nHead length\n\nEar\n\nEar Length\n\nArm\n\nArm length\n\nLeg\n\nLeg length\n\nPres\n\nPes (foot) length\n\nTail\n\nTail length\n\nWeight\n\nWeight (tenths of a gram)\n\nAge\n\nAge in days from birth\n", - "download": "http://www.statsci.org/data/oz/wallaby.txt", - "filename": "wallaby", - "name": "Dryandra Tammar WallabyGrowth of Tammar Wallabies", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Following the Second World War, D. L. Serventy carried out a detailed study of the lifecycle of the Tasmanian muttonbird (Puffinus tenuirostris, often called the short-tailed shearwater). The data here concerns the growth pattern of fledgling birds of this species. \nAfter the eggs hatch, the parent birds spend much time away from the next, and with increasing time their returns become rarer and rarer. When they return the young bird feed copiously, and there is very rapid weight-gain; whilst they are absent, the offspring loses weight. The result is not a smooth growth curve such as one finds in most measurements in developing animals and birds, but a 'sawtooth' effect. The data were collected in 1954 as weighings each morning of two fledgling chicks on Fisher Island, Bass Strait, and each set terminates on the day the chick left the nest. \nMuch of the interest in these curves comes not from the description they give of the weight of the chick, but from the information they contain on the feeding patterns of the parents. There are three obvious features of the data; the timing of the feeds and the size of the feeds when they occur, both of which represent aspects of the feeding pattern of the parents; and the loss in weight of the chicks between feeds. Henstridge and Tweedie (1984) proposed a model, similar to those used in storage theory, which describes each of these phenomena separately.", - "download": "http://www.statsci.org/data/oz/muttonbi.txt", - "filename": "muttonbi", - "name": "Growth of Tasmanian Muttonbirds", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Dimensions in millimetres are given of two samples of jellyfish from Hawkesbury River in New South Wales, Australia. One of the samples came from Dangar Island and the other from Salamander Bay. The first column contains a \"D\" if the measurement came from Dangar Island and a \"S\" if it came from Salamander Bay. The dimensions measured were length and width. What can one learn from graphing the two principal components? Try graphing principal components of the logarithms of the measurements. Can the dimensions determine the location?", - "download": "http://www.statsci.org/data/oz/jellfish.txt", - "filename": "jellfish", - "name": "Dimensions of Jellyfish", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "A study was conducted concerning the counts of lesions produced on membranes of chick embryos by viruses of the pox group. The data give the numbers of lesions formed at a series of dilutions of the viral medium. \n\n\n\n\nVariable\n\nDescriptions\n\n\n\n\nDilution\n\nDilution of viral medium, from 1 to 32\n\nCount\n\nNumber of lesions\n", - "download": "http://www.statsci.org/data/general/pocklesi.txt", - "filename": "pocklesi", - "name": "Pock Lesions on Chick Embryos", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "This data comes from an experiment on induction of flowering of cyclamen. Plants of 4 varieties of cyclamen were subject to a combination of 6 temperature regimens and 4 levels of fertilization. The temperature regimens are combinations of five temperatures during the day (14, 16, 18, 20 and 26 degrees C) and four temperatures during the night (14, 16, 18 and 20 C). Not all the combinations of temperatures are present. The response is the number of flowers, which vary from 4 to 26, with mode 8. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nVariety\n\nVariety of cyclamen\n\nRegimem\n\nTemperature regimen (combination of the temperature during the day and the temperature during the night)\n\nDay\n\nTemperature during the day (Centigrade)\n\nNight\n\nTemperature during the night\n\nFertilizer\n\nLevel of fertilization\n\nFlowers\n\nNumber of flowers\n", - "download": "http://www.statsci.org/data/general/cyclamen.txt", - "filename": "cyclamen", - "name": "Number of Cyclamen Flowers", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "In an experiment where pregnant mice were exposed to the herbicide 2,4,5-T (the active component in Agent Orange), the number of fetal implants in utero were recorded. The data give the frequency distribution of implants at each of seven dose levels measured in mg/kg of body weight. \nOn days 6-14 after mating, pregnant dams were dosed by gavage with one of the doses of 2,4,5-T. Prior to giving birth, the dams were sacrificed and the number of viable, dead and reabsorbed foetuses in the uterus of the dam were determined. The data here gives the number of surviving viable implants. An outcome of zero implants cannot be distinguished from a non-pregnant outcome so any zero implant outcomes were excluded. \n\n\n\n\nVariable\n\nDescriptions\n\n\n\n\nDose\n\nDose of 2,4,5-T in mg/kg/day\n\nImplants\n\nNumber of surviving implants\n\nFrequency\n\nNumber of mice with that number of implants\n", - "download": "http://www.statsci.org/data/general/fetaimpl.txt", - "filename": "fetaimpl", - "name": "Fetal Implants in Mice Utero", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "-", - "download": "https://dasl.datadescription.com/download/data/3074", - "filename": "Bird-Species-2013", - "name": "Bird-Species-2013", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The ranges inhabited by the Indian gharial\ncrocodile and the Australian saltwater crocodile overlap in\nBangladesh. Suppose a very large crocodile skeleton is found\nthere, and we wish to determine the species of the animal.\nWildlife scientists have measured the lengths of the heads\nand the complete bodies of several crocs (in centimeters) of\neach species.\n", - "download": "https://dasl.datadescription.com/download/data/3147", - "filename": "Crocodile-lengths", - "name": "Crocodile lengths", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "In 2004, a team of researchers published a study of contaminants in farmed salmon. Fish from many sources were analyzed for 14 organic contaminants. The study\nexpressed concerns about the level of contaminants found. One of those was the\ninsecticide mirex, which has been shown to be carcinogenic and is suspected to be\ntoxic to the liver, kidneys, and endocrine system. The dataset holds 153 observed salmon samples and reports concentrations of a number of contaminant.", - "download": "https://dasl.datadescription.com/download/data/3199", - "filename": "Farmed-salmon", - "name": "Farmed salmon", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Wildlife researchers monitor many wildlife populations by taking aerial photographs. Can they estimate the weights of alligators accurately from the air? Here are data on the Weight of alligators (in pounds) and their Length (in inches). ", - "download": "https://dasl.datadescription.com/download/data/3236", - "filename": "Gators", - "name": "Gators", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The Maine lobster fishing industry is carefully controlled and licensed, and facts about it have been recorded for more than a century, so it is an important industry that we can examine in detail. The dataset holds annual data ", - "download": "https://dasl.datadescription.com/download/data/3317", - "filename": "Lobsters-2016", - "name": "Lobsters 2016", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Manatees are gentle mammals that live in the waters off the coast of Florida and a few other places. Unfortunately, many are killed each year in collisions with powerboats. Marine biologists warn that the growing number of powerboats registered in Florida threatens the existence of manatees. The data here are the number of manatees killed each year since 1982 and the number of powerboats registered in Florida (in thousands) for those years. Is there a relationship?", - "download": "https://dasl.datadescription.com/download/data/3325", - "filename": "Manatees-2015", - "name": "Manatees 2015", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Psychology experiments sometimes involve testing the\nability of rats to navigate mazes. The mazes are classified\naccording to difficulty, as measured by the mean length of\ntime it takes rats to find the food at the end. One researcher\nneeded a maze that will take rats an average of about one minute\nto solve. He tested one maze on several rats, collecting the\ndata provided.", - "download": "https://dasl.datadescription.com/download/data/3333", - "filename": "Maze", - "name": "Maze", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Can pleasant smells improve learning? Researchers timed 21 subjects as they tried to complete paper-and-pencil mazes. Each subject attempted a maze both with and without the presence of a floral aroma. Subjects were randomized with respect to whether they did the scented trial first or second. Is there any evidence that the floral scent improved the subjects’ ability to complete the mazes?", - "download": "https://dasl.datadescription.com/download/data/3334", - "filename": "Mazes-smells", - "name": "Mazes and smells", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Emperor penguins are the most accomplished divers among birds, making routine\ndives of 5–12 minutes, with the longest recorded dive over 27 minutes. These\nbirds can also dive to depths of over 500 meters! Since air-breathing animals like\npenguins must hold their breath while submerged, the duration of any given dive\ndepends on how much oxygen is in the bird’s body at the beginning of the dive, how\nquickly that oxygen gets used, and the lowest level of oxygen the bird can tolerate.\nThe rate of oxygen depletion is primarily determined by the penguin’s heart rate.\nConsequently, studies of heart rates during dives can help us understand how these\nanimals regulate their oxygen consumption in order to make such impressive dives.The researchers equipped emperor penguins with devices that record their heart rates during\ndives. The dataset reports Dive Heart Rate (beats per minute), the Duration\n(minutes) of dives, and other related variables.", - "download": "https://dasl.datadescription.com/download/data/3391", - "filename": "Penguins", - "name": "Penguins", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Salmon", - "download": "https://dasl.datadescription.com/download/data/3435", - "filename": "Salmon", - "name": "Salmon", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The number of storks in Oldenburg, Germany, plotted against the population of the town for 7 years in the 1930s. Do storks bring babies? ", - "download": "https://dasl.datadescription.com/download/data/3462", - "filename": "Storks", - "name": "Storks", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Large herds of wild horses can become a problem on some federal lands in the West. Researchers hoping to improve the management of these herds collected data to see if they could predict the number of foals that would be born based on the size of the current herd. ", - "download": "https://dasl.datadescription.com/download/data/3524", - "filename": "Wild-horses", - "name": "Wild horses", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The dataset consists of a few variables that may influence the demand for Beef in the United States. It provides an example of the influence of inflation in monetary time series data as well as providing some interesting statistical features in building demand models in regression.", - "download": "http://jse.amstat.org/v22n1/kopcso/BeefDemand.txt", - "filename": "BeefDemand", - "name": "Beef Demand", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "159 fishes of 7 species are caught and measured. Altogether there are\n8 variables. All the fishes are caught from the same lake\n(Laengelmavesi) near Tampere in Finland.", - "download": "http://jse.amstat.org/datasets/fishcatch.dat.txt", - "filename": "fishcatch", - "name": "fishcatch", - "number_format": 31, - "remove_quotes": true, - "separator": "SPACE", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": false - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "A cost of increased reproduction in terms of reduced longevity has been\nshown for female fruitflies, but not for males. The flies used were an\noutbred stock. Sexual activity was manipulated by supplying individual\nmales with one or eight receptive virgin females per day. The\nlongevity of these males was compared with that of two control types.\nThe first control consisted of two sets of individual males kept with\none or eight newly inseminated females. Newly inseminated females will\nnot usually remate for at least two days, and thus served as a control\nfor any effect of competition with the male for food or space. The\nsecond control was a set of individual males kept with no females.\nThere were 25 males in each of the five groups, which were treated\nidentically in number of anaesthetizations (using CO2) and provision of\nfresh food medium.", - "download": "http://jse.amstat.org/datasets/fruitfly.dat.txt", - "filename": "fruitfly", - "name": "Sexual activity and the lifespan of male fruitflies", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The file ch4a.dat contains the burlap data, with the following variables:\n\n1. mburlap = mean burlap count value obtained over 12 subplot values.\n\n2. megg = mean egg mass density per acre obtained over 21 subplot values.\n\n3. seegg = estimated standard error of mean egg mass density obtained\nover 21 subplot values.", - "download": "http://lib.stat.cmu.edu/datasets/csb/ch4a.dat", - "filename": "Gypsy-Moth", - "name": "Measurement Error Models for Gypsy Moth Studies", - "number_format": 31, - "remove_quotes": true, - "separator": "SPACE", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": false - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The file ch4b.dat contains the defoliation data, with the following variables:\n\n1. mdef = mean defoliation value obtained from 20 subplot values.\n\n2. sedef = estimated standard error of mean defoliation\nobtained from 20 subplot values.\n\n3. megg = mean estimated egg mass density obtained over 20 subplots\n\n4. seegg = estimated standard error or mean egg \nmass density obtained from 20 subplot values.\n\n5. cdefegg = estimated covariance of mean defoliation and mean egg mass\ndensity obtained from 20 subplot values.\n", - "download": "http://lib.stat.cmu.edu/datasets/csb/ch4b.dat", - "filename": "Gypsy-Moth2", - "name": "Measurement Error Models for Gypsy Moth Studies", - "number_format": 31, - "remove_quotes": true, - "separator": "SPACE", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": false - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The file ch7.dat contains the following variables:\n\nNo - observation number (1,...,294).\nTIME - survival time of halibut (time until death) in hours.\n (NOTE the Table 1 in the book claims survival time is in minutes,\n but HOURS is the correct unit)\nCENSOR - censoring indicator. 1=uncensored observation;\n 0=censored observation.\nTOWD - duration (in minutes) of time trawl net was towed on the bottom.\nDELDEPTH - difference between maximum and minimum depth observed during tow\n (depth measured in meters).\nLENGTH - fork length of halibut in centimeters.\nHANDTIME - handling time (in minutes) between net coming on board vessel \n and fish being placed in holding tanks.\nLOGCAT - natural logarithm of total catch of fish in tow.", - "download": "http://lib.stat.cmu.edu/datasets/csb/ch7.dat", - "filename": "Atlantic-Halibut", - "name": "Survival Analysis for Size Regulation of Atlantic Halibut", - "number_format": 31, - "remove_quotes": true, - "separator": "SPACE", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": false - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The file ch9.dat contains the following variables:\n\nBIRD : Bird id. \nRX : 1=NT, 2=PT, 3=FT, standing for \"No Tape\" (NT), in which no visible\n guides connected light cues\n with the feeders below them; \"Partial Tape\" (PT), in which fluorescent\n orange Dymo type provided a discontinuous (i.e., broken in two places) \n connection between each light cue and its feeder; and \"Full Tape\"\n (FT), in which the visible guide between each light cue and\n its feeder (fluorescent orange Dymo tape) was continuous.\n Feeding continued for 180 trials.\nGENDER : 0=male, 1=female. \nOUTCOME: 0=failure 1= success.", - "download": "http://lib.stat.cmu.edu/datasets/csb/ch9.dat", - "filename": "Hummingbirds", - "name": "Spatial Association Learning in Hummingbirds\n", - "number_format": 31, - "remove_quotes": true, - "separator": "SPACE", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": false - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The file ch10.dat contains eight variables, with 30 cases for each.\nEach case refers to a site in the forest. The first variable,\n'random', is a character variable indicating whether the site is a\nspotted owl nest site (=N) or a site selected at random\ncoordinates (=R). Variables 2-8 contain the percents of mature forest\n(>80 years of age). The variable names indicate the outer radii of the\nrings in which the percents were calculated. They are: 0.91km,\n1.18km, 1.40km, 1.60km, 1.77km, 2.41km, and 3.38km. So, for example,\nthe variable '1.18km' contains the percents of mature forest in\nrings with outer radius 1.18km and inner radius .91km centered at \nthe different sites.", - "download": "http://lib.stat.cmu.edu/datasets/csb/ch10.dat", - "filename": "Habitat-Association", - "name": "Habitat Association Studies of the Northern Spotted\nOwl, Field Grouse, and Flammulated Owl\n", - "number_format": 31, - "remove_quotes": true, - "separator": "SPACE", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": false - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The file ch11a.dat contains a body temperature time series for an\nadult female beaver (Castor canadensis) obtained December 12-13, 1990 \nat Sandhill Wildlife Area, Wisconsin. Observations were made at 10\nminute intervals. These observations follow a random pattern of\nfluctuations, typically observed during freeze-up for all beaver in\nthis study. \n\nVariable List:\n\nObservation No.\nJulian day\nTime\nBody temperature (degrees C) \nActivity (0 = animal inside retreat; 1 = animal outside retreat) \n", - "download": "http://lib.stat.cmu.edu/datasets/csb/ch11a.dat", - "filename": "Beaver-Body-Temperatures", - "name": "Time-Series Analyses of Beaver Body Temperatures", - "number_format": 31, - "remove_quotes": true, - "separator": "SPACE", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": false - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The file ch11a.dat contains a body temperature time series for\na subadult female beaver (Castor\ncanadensis). Observations were made at Sandhill Wildlife Area,\nWisconsin, November 3-4, 1990 (before freeze-up). Temperature\nobservations follow a plateau pattern, typically observed during\nthe entire ice-free period (late spring to late autumn). Only the\nfirst 100 observations are included in this data set.\n\nVariable list:\n\nObservation number\nJulian day\nTime\nBody temperature (degrees C)\nActivity (0 = animal inside retreat; 1 = animal outside retreat)", - "download": "http://lib.stat.cmu.edu/datasets/csb/ch11b.dat", - "filename": "Beaver-Body-Temperatures2", - "name": "\nTime-Series Analyses of Beaver Body Temperatures\n", - "number_format": 31, - "remove_quotes": true, - "separator": "SPACE", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": false - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The main data set consists of king crab pot survey data for the years 1973\nthrough 1986. The surveys were conducted in the waters around Kodiak Island,\nAlaska, using pots similar to the pots used by the commercial fishing fleet.\n(A crab pot is a trap that resembles a wooden crate.) A fixed sampling grid\nwas used to place strings of pots (stations) consisting usually of 10 pots in\nopen ocean, or of 2-5 pots in bays. The pots were left in the water for\nperiods of 16-24 hours, removed, and the crab counts recorded. The survey was\nconducted each summer, 2-4 weeks prior to start of the commercial fishing\nseason. The crab counts are classified by size (roughly representing age) and\nsex into six categories.\n\nThe basic survey data is a file \"survey\", containing a 3,450 by 14 matrix\nwith these columns:\n\n 1. Year (last two digits)\n 2. Fishing district (one of four)\n 3. Station identifier (alphabetic)\n 4. The number of pots fished\n 5-6. Latitude and longitude of the location halfway between\n the first and last pot of the station\n 7. Number of pre-recruit-4 crab\n 8. Number of pre-recruit-3 crab\n 9. Number of pre-recruit-2 crab\n 10. Number of pre-recruit-1 crab\n 11. Number of recruit males\n 12. Number of post-recruit males\n 13. Number of juvenile females\n 14. Number of adult females", - "download": "http://lib.stat.cmu.edu/crab/survey", - "filename": "survey_", - "name": "Survey", - "number_format": 31, - "remove_quotes": true, - "separator": "SPACE", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": false - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "==================== Contents of file \"dstns\" ============================\n \nFor each of the years in the survey (1973 to 1986), a frequency distribution\nof the crab by size (in 1 mm increments) that were surveyed. Separate\ndistributions are given for juvenile females, adult females, and all males.\nThe five columns are:\n\n 1. year\n 2. length in mm\n 3. count of juvenile females\n 4. count of adult females\n 5. count of all males", - "download": "http://lib.stat.cmu.edu/crab/dstns", - "filename": "dstns", - "name": "dstns", - "number_format": 31, - "remove_quotes": true, - "separator": "SPACE", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": false - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "For each of the 14 years in the survey (1973-86), an estimate of the number of\neggs per female. Columns are:\n\n 1. year\n 2. estimated eggs per adult female", - "download": "http://lib.stat.cmu.edu/crab/eggs", - "filename": "eggs", - "name": "eggs per female", - "number_format": 31, - "remove_quotes": true, - "separator": "SPACE", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": false - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "For each year in the survey, a frequency distribution of all females\ncross-classified by size (in 1 mm increments) and percent clutch fullness (5\ncategories). Clutch fullness is, roughly, the realized egg-bearing potential\nof a female crab. The seven columns are:\n\n 1. year\n 2. size, in mm\n 3. count of females with 0% fullness\n 4. count of females with 1-29% fullness\n 5. count of females with 30-59% fullness\n 6. count of females with 60-89% fullness\n 7. count of females with 90-100% fullness", - "download": "http://lib.stat.cmu.edu/crab/fullness", - "filename": "fullness", - "name": "Clutch fullness", - "number_format": 31, - "remove_quotes": true, - "separator": "SPACE", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": false - } - ], - "subcategory_name": "Animals" - }, - { - "datasets": [ - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Risk and Sammarco (1991) found that the density of the Great Barrier Reef coral Porites lobata increases with distance from the Australian shore, due to differences between inshore and offshore environments. They made three measurements at each of nine reefs at various distances from the shore. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nReef\n\nName of reef\n\nDistance\n\nDistance to shore (km)\n\nDensity\n\nCoral head density (g/cm3)\n", - "download": "http://www.statsci.org/data/oz/coralden.txt", - "filename": "coralden", - "name": "Density of Great Barrier Reef Coral Heads", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The data give the volume (cubic feet), height (feet) and diameter (inches) (at 54 inches above ground) for a sample of 31 black cherry trees in the Allegheny National Forest, Pennsylvania. The data were collected in order to find an estimate for the volume of a tree (and therefore the timber yield), given its height and diameter. ", - "download": "http://www.statsci.org/data/general/cherry.txt", - "filename": "cherry", - "name": "Volume of Black Cherry Trees", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The data is from a dew-retting experiment in Ballarat 1942-43, in which flax was laid out under various climactic conditions and for various periods. Retting involves softening the flax stems by soaking in water, thus enabling the separation of the linen fibres from the wooden material by a process called scrutching. The flax variety used was \"Liral Crown\". Two samples were taken from each trial and the ret loss, as a percentage, was calculated. The other three variables are the mean daily rainfall (in points), the retting period (in days) and the mean daily temperature (in degrees Farenheit).", - "download": "http://www.statsci.org/data/oz/retloss.txt", - "filename": "retloss", - "name": "Ret Loss in Flax", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "A production plant cost-control engineer is responsible for cost reduction. One of the costly items in his plant is the amount of water used by the production facilities each month. He decided to investigate water usage by collecting seventeen observations on his plant's water usage and other variables. \n\n\n\n\nVariable\n\nDescription\n\n\n\n\nTemperature\n\nAverage monthly temperate (F)\n\nProduction\n\nAmount of production (M pounds)\n\nDays\n\nNumber of plant operating days in the month\n\nPersons\n\nNumber of persons on the monthly plant payroll\n\nWater\n\nMonthly water usage (gallons)\n", - "download": "http://www.statsci.org/data/general/water.txt", - "filename": "water_", - "name": "Water Usage of Production Plant", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Ryan et al (1994) describe the data as follows: \nIn autumn, small winged fruit called samara fall off maple trees, spinning as they go. A forest scientist studied the relationship between how fast they fell and their \"disk loading\" (a quantity based on their size and weight). The samara disk loading is related to the aerodynamics of helicopters. \nThe data give the loadings and fall velocities for fruit from three trees. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nTree\n\n1 to 3\n\nLoading\n\nDisk loading\n\nVelocity\n\nFall velocity\n", - "download": "http://www.statsci.org/data/general/samara.txt", - "filename": "samara", - "name": "Fall Velocities for Samara Fruit", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The yield of pasture regrowth was measured together with the number of days since last grazing. The measurements were done on different experimental units so it is reasonable to assume the errors independent. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nDays\n\nDays since last grazing\n\nYield\n\nYield of pasture\n", - "download": "http://www.statsci.org/data/general/regrowth.txt", - "filename": "regrowth", - "name": "Pasture Regrowth after Grazing", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Herbicide bioassay is concerned with the reduction in plant growth as a function of the herbicide dose applied. This is a interest when developing new herbicides, assessing environmental effects on non-target species or estimating the residual herbicides in a treated soil before planting a new, herbicide suspectible crop. A typical experiment would comprise a series of doses ranging from ineffective to severely damaging to establish a dose-response relationship. In this experiment the callus area of a tissue culture of Brassica napus was measured corresponding to different doses of a sulfonylurea herbicide, metsulfuron methyl. \n\n\n\n\nVariable\n\nDescription\n\n\n\n\nChlorsulfuron\n\nConcentration of herbicide in nmol/L\n\nCallus\n\nLogarithm of callus area\n", - "download": "http://www.statsci.org/data/general/brassica.txt", - "filename": "brassica", - "name": "Response of Brassica napus to Chlorsulfuron", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The data concerns the underground root system of eight separate apple trees. Three different root stocks are considered (Mark, MM106 and M26) and two plant spacing (4x2 meters and 5x3 meters). For each plant, soil core sampling units taken have been classified as belonging to an inner or outer zone. The response variable is the density of fine roots, also called the root length density, which can have zeros as well as continuous positive values. There are 511 observations, of which 193 or 38% have a zero response. \nThe design is not a full factorial design: plants 1 and 2 are tested only with the Mark root stock and at a spacing of 5x3; plants 3 and 4 are tested only with Mark root stock at a spacing of 4x2; plants 5 and 6 are tested only with root stock MM106 at a spacing of 5x3; and plants 7 and 8 are tested only with M26 root stock at a spacing of 4x2. The Mark root stock is tested at both plant spacings but the MM106 only at 5x3 and M26 only at 4x2. So there are four unique treatment combinations: Mark stock at 5x3 and 4x2, MM106 at 5x3, and M26 at 4x2. \nIt is of interest to (1) compare effects of spacing within Mark rootstock, (2) compare root stocks within same spacing and (3) to look for any difference in RLD between inner and out zones. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nPlant \n\n1 to 8\n\nStock\n\nRoot stock: Mark, MM106 or M26\n\nSpacing\n\nPlant spacing: 5x3 or 4x2 meters\n\nZone\n\nZone relative to the plant the soil core is taken from: Inner or Outer\n\nRLD\n\nRoot length density in cm/cm3\n", - "download": "http://www.statsci.org/data/oz/fineroot.txt", - "filename": "fineroot", - "name": "Root Length Density of Apple Trees", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "A biology student studied the effect of10 different fertilizers on the growth of mung bean sprouts. She sprouts 12 beans in each of 10 different petri dishes, and adds the same amount of fertilizer to each dish. After one week she measures the heights of the 120 sprouts in millimeters. ", - "download": "https://dasl.datadescription.com/download/data/3203", - "filename": "Fertilizers", - "name": "Fertilizers", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "In 1936 Sir Ronald Fisher presented data on irises as the example in a famous statistics paper. Ever since, “Fisher’s Iris data” have been a feature of statistics texts. Fisher presents 4 measurements of Iris flowers of three species. Can we differentiate the species? If so, how best to do that?", - "download": "https://dasl.datadescription.com/download/data/3206", - "filename": "Fisher-Irises", - "name": "Fisher’s Irises", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The Hopkins Memorial Forest is a 2500-acre reserve in Massachusetts, New York, and Vermont managed by the Williams College Center for Environmental Studies (CES). As part of its mission, the CES monitors forest resources and conditions over the long term. ", - "download": "https://dasl.datadescription.com/download/data/3271", - "filename": "Hopkins-Forest", - "name": "Hopkins Forest", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "One can determine how old a tree is by counting its rings, but that requires either cutting the tree down or extracting a sample from the tree’s core. Can we estimate the tree’s age simply from its diameter?A forester measured 27 trees of the same species that had been cut down, and counted the rings to determine the ages of the trees. ", - "download": "https://dasl.datadescription.com/download/data/3277", - "filename": "old-tree", - "name": "How old is that Tree", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "As the number of oranges on a tree increases, the fruit tends to get smaller. The dataset gives numbers of oranges/tree and average weight/orange (in pounds).", - "download": "https://dasl.datadescription.com/download/data/3385", - "filename": "Oranges", - "name": "Oranges", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "An experiment on mung beans was performed\nto investigate the environmental effects of salinity and\nwater temperature on sprouting. Forty beans were randomly\nallocated to each of 36 petri dishes that were subject\nto one of four levels of Salinity (0, 4, 8, and 12 ppm)\nand one of three Temperatures (32°, 34°, or 36° C). After\n48 hours, the biomass of the sprouts in gm was measured. The percent of beans germinating is also recorded.", - "download": "https://dasl.datadescription.com/download/data/3458", - "filename": "Sprouts", - "name": "Sprouts", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Tree growth", - "download": "https://dasl.datadescription.com/download/data/3497", - "filename": "Tree-growth", - "name": "Tree growth", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Vineyards", - "download": "https://dasl.datadescription.com/download/data/3513", - "filename": "Vineyards", - "name": "Vineyards", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "John O. Rawlings and Susan E. Spruill\n\nThe data set ch5.dat contains the following variables:\n\n1. site: coded 1-6 corresponding to the location code used in Table 1.\n2. block: block within site coded 1, 2, ... within sites for the RCB designs;\n block=1 for all observations for the CRD designs, sites 5 and 6.\n3. rep: replication within site coded as missing in sites 1-4;\n coded as 1, 2, ... for replicates in the CRD design.\n4. ozone: target ozone treatment, coded 0.0=charcoal filtered air, \n 1.0=nonfiltered air, \"x.x\"=target level of ozone as multiple of \n ambient ozone level.\n5. rain: acidic rain treatment, coded as pH of rain solution.\n6. fam: genetic family, coded as 1, 2, ... within sites.\n7. ppmhrs: cumulative ozone exposure (ppm-h) during the two years of\n the trials.\n8. vwpH: cumulative exposure to acidic rain computed as vwpH \n = -log(sum(volume*hydrogen ion concentration)).\n9. biomass: total above ground biomass (g) after two growing seasons.\n10. diam: increment of diameter growth (mm) during the two growing seasons.\n11. DMA: whole-plot component of the covariate initial diameter (mm)\n expressed as the deviation of the whole-plot mean from the overall\n site mean.\n12. DMB: sub-plot component of the covariate initial diameter (mm)\n expressed as the deviation of the subplot mean from the whole-plot mean.\n13. D2HA: whole-plot component of the covariate initial volume, \n approximated as diameter squared times height, and expressed as\n the deviation of the whole-plot mean from the overall site mean.\n14. D2HB: sub-plot component of the covariate initial volume and\n expressed as the deviation of the subplot mean from the whole-plot mean.\n15. DMOT: depth to mottling (cm) of the clay soil; one measurement\n per whole-plot. \n\nMissing data are coded with '.'", - "download": "http://lib.stat.cmu.edu/datasets/csb/ch5.dat", - "filename": "Pine-Seedling", - "name": "Estimating Pine Seedling Response to Ozone and Acid Rain", - "number_format": 31, - "remove_quotes": true, - "separator": "SPACE", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": false - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The file ch8.dat contains the following variables:\n\nPop - population code, 1034 or 1040\nADH - 1 (cepa), 2 (het) or 3 (fist)\nIDH - 1 (cepa), 2 (het) or 3 (fist)\nPGI - 1 (cepa), 2 (het) or 3 (fist)\nfreq - frequency", - "download": "http://lib.stat.cmu.edu/datasets/csb/ch8.dat", - "filename": "Hybrid-Onions", - "name": "\nMixture Fraction and Linkage Analyses for Hybrid Onions", - "number_format": 31, - "remove_quotes": true, - "separator": "SPACE", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": false - } - ], - "subcategory_name": "Plants" - }, - { - "datasets": [ - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Data were collected from a mine in Cobar, NSW, Australia. At each of 38 sampling points, several measurements were taken, one of which is the 'true-width' of an ore-bearing rock layer. Also given are the co-ordinates t1 and t2 of of the data sites. Green and Silverman (1994) use this data set to illustrate thin-plate splines for fitting a smooth surface.", - "download": "http://www.statsci.org/data/oz/ore.txt", - "filename": "ore", - "name": "Wide of Ore-Bearing Layer", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The proportions of sand, silt and clay in soil samples are given for 8 contiguous sites. The sites extended over the crest and flank of a low rise in a valley underlain by marl near Albudeite in the province of Murcia, Spain. The sites were small areas of ground surface of uniform shape internally and delimited by relative discontinuities externally. Soil samples were obtained for each site at 11 random points within a 10m by 10m area centred on the mid-point of the site. All samples were taken from the same depth. The data give the sand, silt and clay content of each sample, expressed as a percentage of the total sand, silt and clay content. \nThe purpose of the study by Wright and Wilson (1979) was to determine whether the sites could be differentiated on the basis of their soil composition. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nSite \n\n1-8\n\nSand \n\nPercent sand\n\nSilt\n\nPercent silt\n\nClay\n\nPercent clay\n", - "download": "http://www.statsci.org/data/general/murcia.txt", - "filename": "murcia", - "name": "Composition of Soil from Murcia Province, Spain", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Meteor Crater in Arizona was the first recognized impact crater and was identified as such only in the 1920s. With the help of satellite images, more and more craters have been identified; now more than 180 are known. These, of course, are only a small sample of all the impacts the earth has experienced: Only 29% of earth’s surface is land, and many craters have been covered or eroded away. Astronomers have recog-nized a roughly 35 million-year cycle in the frequency of cratering, although the cause of this cycle is not fully understood.\nThe data hold information about craters. craters from the most recent 35Ma (million years) may be the more reliable data, and are suitable for analyses relating age and diameter.", - "download": "https://dasl.datadescription.com/download/data/3142", - "filename": "Craters", - "name": "Craters", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The file ch6.dat contains the following variables. \n\n\nSTRATA : National Marine Fisheries Service (NMFS) 4 digit strata\n designator in which the sample was taken \n \nSAMPLE : Sample number per year ranging from 1 to approximately 450\n\nLAT : Location in terms of latitude of each sample in the Atlantic Ocean \n\nLONG : Location in terms of longitude of each sample in the Atlantic Ocean\n \nTCATCH : Total number of scallops caught at the ith sample location\n\nPREREC : Number of scallops whose shell length is smaller than 70 millimeters\n \nRECRUITS : Number of scallops whose shell length is 70 millimeters or larger", - "download": "http://lib.stat.cmu.edu/datasets/csb/ch6.dat", - "filename": "Scallop-Abundance", - "name": "Geostatistical Estimates of Scallop Abundance", - "number_format": 31, - "remove_quotes": true, - "separator": "SPACE", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": false - } - ], - "subcategory_name": "Geology" - }, - { - "datasets": [ - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "-", - "download": "https://dasl.datadescription.com/download/data/3094", - "filename": "Carbon-footprint", - "name": "Carbon footprint", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "-", - "download": "https://dasl.datadescription.com/download/data/3095", - "filename": "Carbon-footprint-2015", - "name": "Carbon footprint 2015", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "-", - "download": "https://dasl.datadescription.com/download/data/3240", - "filename": "Gemstones", - "name": "Gemstones", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "It is a common belief that Yellowstone’s most famous geyser erupts once an hour at very predictable intervals. But, in fact, the intervals between eruptions can vary greatly. Can we predict the interval from, for example, the duration of the previous eruption? Are there other patterns in the data worth noting? ", - "download": "https://dasl.datadescription.com/download/data/3380", - "filename": "Old-Faithful", - "name": "Old Faithful", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Ozone levels (in parts per billion, ppb) were recorded at sites in New Jersey monthly between 1926 and 1971. Here are boxplots of the data for each month (over the 46 years), lined up in order (January = 1): ", - "download": "https://dasl.datadescription.com/download/data/3386", - "filename": "Ozone", - "name": "Ozone", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The National Interagency Fire Center reports statistics about wildfires. They report data from 1960, but the years 1960-1984 are so different from subsequent years that they can’t be analyzed together. These data are for 1985-2015. Is there a pattern over time? What is the relationship between the number of fires and the acres affected? Are fires getting larger or smaller on average?", - "download": "https://dasl.datadescription.com/download/data/3523", - "filename": "Wildfires-2015", - "name": "Wildfires 2015", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Geographical coordinates of the shoreline of the 17 islands that form the\nKodiak Island group. The two columns are\n\n 1. latitude\n 2. longitude\n\nmeasured in degrees and fractions of a degree. Each of the 17 groups of\ncoordinates is terminated by a pair of \"NA\"s, and the end of each group loops\nback to the beginning. For drawing maps, bear in mind that longitude is\nmeasured East to West, which is right to left. This suggests plotting\nnegative longitude instead of longitude. Also, to draw maps that \"look right\"\nto an Alaskan, you must take into account that in this part of the world the\naspect ratio of one degree latitude (y-axis) to one degree longitude (x-axis)\nis 1:1.8 (in terms of actual ground distance).", - "download": "http://lib.stat.cmu.edu/crab/kodiak", - "filename": "kodiak", - "name": "Geographical coordinates of the shoreline of Kodiak Island group", - "number_format": 31, - "remove_quotes": true, - "separator": "SPACE", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": false - } - ], - "subcategory_name": "Other" - } - ] - }, - { - "category_name": "Statistics", - "subcategories": [ - { - "datasets": [ - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The data give the year of founding for 40 New Zealand wineries.", - "download": "http://www.statsci.org/data/oz/wineries.txt", - "filename": "wineries", - "name": "Founding Dates of NZ Wineries", - "number_format": 31, - "remove_quotes": true, - "separator": "auto", - "simplify_whitespaces": true, - "skip_empty_parts": false, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The data give the selling price at auction of 32 antique grandfather clocks. Also recorded is the age of the clock and the number of people who made a bid. ", - "download": "http://www.statsci.org/data/general/auction.txt", - "filename": "auction", - "name": "Selling Price of Antique Grandfather Clocks", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The two columns of the data are the prices and year purchased for 124 Mazda cars, as taken from the classified section of the Melbourne Age during the course of 1991. Hence the age of the car at the time can be calculated and used to model car price. ", - "download": "http://www.statsci.org/data/oz/mazdas.txt", - "filename": "mazdas", - "name": "Age and Price of Mazda Cars", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The data show the capital value and annual rental value of 96 domestic properties in Auckland in 1991. The aim was to explore their relationship in the hope of being able to predict capital value from rental value, thus the latter is the explanatory variable in this case.", - "download": "http://www.statsci.org/data/oz/rentcap.txt", - "filename": "rentcap", - "name": "Capital and Rental Values of Auckland Properties", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The data give the selling price at auction of 32 antique grandfather clocks. Also recorded is the age of the clock and the number of people who made a bid. \n\n\n\n\nVariable\n\nDescription\n\n\n\n\nAge\n\nAge of the clock (years)\n\nBidders\n\nNumber of individuals participating in the bidding\n\nPrice\n\nSelling price (pounds sterling)\n", - "download": "http://www.statsci.org/data/general/auction.txt", - "filename": "auction_", - "name": "Selling Price of Antique Grandfather Clocks", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The data were collected to study the variation in rent paid in 1977 for agricultural land planted to alfalfa in Minnesota. The data include: \n\n\nVariable\n\nDescription\n\n\nRent\n \naverage rent per acre planted to alfalfa\nAllRent\n \naverage rent paid for all tillable land\nCows\n \ndensity of diary cows (number per square mile)\nPasture\n \nproportion of farmland used as pasture\nLiming\n \nYes if liming is required to grow alfalfa; No otherwise\n", - "download": "http://www.statsci.org/data/general/landrent.txt", - "filename": "landrent", - "name": "Rent for Land Planted to Alfalfa", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Monthly observations on various share price and financial variables were recorded from October 1991 to August 1997. Data collected by Francine Pritchard and Glen Dixon for their MS305 data analysis project in 1997. \n\n\nVariable\n\nDescription\n\n\nBank\n\nShare Price Index\nAllOrds\n\n\nDevelop\n\n\nMining\n\n\nGold\n\n\nBuild\n\n\nProp\n\n\nIndust\n\n\nEnergy\n\n\nFinance\n\n\nResource\n\n\nTransport\n\n\nRetail\n\n\nUnemploy\n\nUnemployment Rate\nCPI\n\nConsumer Price Index\nBankBill\n\n90 Day Bank Bill Interest Rate\n", - "download": "http://www.statsci.org/data/oz/bankbill.txt", - "filename": "bankbill", - "name": "90 Day Bank Bills", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The following data was collected in the 1960s at a house in south-east England. The weekly gas consumption (in 1000 cubic feet) and the average outside temperature (in degrees Celsius) was recorded for 26 weeks before and 30 weeks after cavity-wall insulation had been installed. The house thermostat was set at 20�C throughout. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nInsulate\n\nBefore or After\n\nTemp\n\nAverage outside temperature (C)\n\nGas\n\nGas consumption (1000's of cubic feet)\n", - "download": "http://www.statsci.org/data/general/insulgas.txt", - "filename": "insulgas", - "name": "House Insulation and Gas Consumption", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Age specific term life premium rates for a sum insured of $50,000 are given in the table. The first column is the age of insured, the next two columns are the rates for male smokers and non-smokers, and the last two columns are the rates for female smokers and non-smokers. The four separate sets of points may be plotted and cubic spline regression used to fit them.", - "download": "http://www.statsci.org/data/oz/insure.txt", - "filename": "insure", - "name": "Insurance Premiums", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The data give the Canadian automobile insurance experience for policy years 1956 and 1957 as of June 30, 1959. The data includes virtually every insurance company operating in Canada and was collated by the Statistical Agency (Canadian Underwriters' Association - Statistical Department) acting under instructions from the Superintendent of Insurance. The data given here is for private passenger automobile liability for non-farmers for all of Canada excluding Saskatchewan. \nThe variable Merit measures the number of years since the last claim on the policy. The variable Class is a collation of age, sex, use and marital status. The variables Insured and Premium are two measures of the risk exposure of the insurance companies. \n\n\n\n\nVariable\n\nDescription\n\n\n\n\nMerit\n\nMerit Rating:\n3 - licensed and accident free 3 or more years\n2 - licensed and accident free 2 years\n1 - licensed and accident free 1 year\n0 - all others\n\nClass\n\n1 - pleasure, no male operator under 25\n2 - pleasure, non-principal male operator under 25\n3 - business use\n4 - unmarried owner or principal operator under 25\n5 - married owner or principal operator under 25\n\nInsured\n\nEarned car years\n\nPremium\n\nEarned premium in 1000's\n(adjusted to what the premium would have been had all cars been written at 01 rates)\n\nClaims\n\nNumber of claims\n\nCost\n\nTotal cost of the claim in 1000's of dollars\n", - "download": "http://www.statsci.org/data/general/carinsca.txt", - "filename": "carinsca", - "name": "Canadian Automobile Insurance Claims for 1957-1958", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The data give details of third party motor insurance claims in Sweden for the year 1977. \n\"In Sweden all motor insurance companies apply identical risk arguments to classify customers, and thus their portfolios and their claims statistics can be combined. The data were compiled by a Swedish Committee on the Analysis of Risk Premium in Motor Insurance. The Committee was asked to look into the problem of analyzing the real influence on claims of the risk arguments and to compare this structure with the actual tariff.\" \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nKilometres \n\nKilometres travelled per year\n1: < 1000\n2: 1000-15000\n3: 15000-20000\n4: 20000-25000\n5: > 25000\n\nZone \n\nGeographical zone\n1: Stockholm, Göteborg, Malmö with surroundings\n2: Other large cities with surroundings\n3: Smaller cities with surroundings in southern Sweden\n4: Rural areas in southern Sweden\n5: Smaller cities with surroundings in northern Sweden\n6: Rural areas in northern Sweden\n7: Gotland \n\nBonus\n\nNo claims bonus. Equal to the number of years, plus one, since last claim\n\nMake\n\n1-8 represent eight different common car models. All other models are combined in class 9\n\nInsured\n\nNumber of insured in policy-years\n\nClaims\n\nNumber of claims\n\nPayment\n\nTotal value of payments in Skr\n\n\n\n", - "download": "http://www.statsci.org/data/general/motorins.txt", - "filename": "motorins", - "name": "Third Party Motor Insurance in Sweden", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The data give the average claims for damage to the owner's car for privately owned and comprehensively insured vehicles in Britain in 1975. Averages are given in pounds sterling adjusted for inflation. \n\n\n\n\nVariable\n\nDescription\n\n\n\n\nOwnerAge\n\nPolicy-holder's age in years, categorized into 8 levels\n\nModel\n\nType of car, in 4 groups\n\nCarAge\n\nVehicle age in years, categorized into 4 levels\n\nNClaims\n\nNumber of claims\n\nAveCost\n\nAverage cost of each claim in pounds\n", - "download": "http://www.statsci.org/data/general/carinsuk.txt", - "filename": "carinsuk", - "name": "British Car Insurance Claims for 1975", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Monthly data relating to hotels, motels and guesthouses in Victoria, from January 1980 to June 1995. First column: total number of room nights occupied; Second column: total takings from accommodation. ", - "download": "http://www.statsci.org/data/oz/motel.txt", - "filename": "motel", - "name": "Hotels, Motels and Guesthouses in Victoria", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The data list the CPI (Consumer Price Index) figures for five countries from 1985 to 1994. The countries are Australia, Canada, New Zealand, the United Kingdom and the United States. Each index is based on the December Quarter 1993 (1000).", - "download": "http://www.statsci.org/data/oz/cpifive.txt", - "filename": "cpifive", - "name": "CPI for Five Countries", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Quarterly CPI indices for Brisbane for food, clothing, housing etc, from June 1972 to September 1997. \nThe groups are: Food, Clothing, Housing, Household equipment and operation, Transportation, Tobacco and Alcohol, Health and personal care, Recreation and education, and All groups. The CPI are standardized so that the year 1989-90 is 100.0. ", - "download": "http://www.statsci.org/data/oz/cpibris.txt", - "filename": "cpibris", - "name": "Brisbane Consumer Price Indices", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Quarterly price indices for established homes in Australian capital cities, from June 1986 to June 1997. The price indices are standardized so that the year 1989-1990 is 100.0 for each city. ", - "download": "http://www.statsci.org/data/oz/houses.txt", - "filename": "houses", - "name": "House Price Indexes", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "To shorten the time it takes him to make his favorite pizza, a student designed an experiment to test the effect of sugar and milk on the activation times for baking yeast. Specifically, he tested four different recipes and measured how many seconds it took for the same amount of dough to rise to the […] ", - "download": "https://dasl.datadescription.com/download/data/3042", - "filename": "activating-baking-yeast", - "name": "Activating baking yeast", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The American International Group (AIG) was once the 18th largest corporation in the world. By early 2007 AIG had assets of $1 trillion, $110 billion in revenues, 74 million customers and 116,000 employees in 130 countries and jurisdictions. Yet just 18 months later, AIG found itself on the brink of failure and in need of emergency […] ", - "download": "https://dasl.datadescription.com/download/data/3046", - "filename": "AIG-daily", - "name": "AIG daily", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The American International Group (AIG) was once the 18th largest corporation in the world. By early 2007 AIG had assets of $1 trillion, $110 billion in revenues, 74 million customers and 116,000 employees in 130 countries and jurisdictions. Yet just 18 months later, AIG found itself on the brink of failure and in need of emergency […]", - "download": "https://dasl.datadescription.com/download/data/3047", - "filename": "AIG-monthly", - "name": "AIG monthly", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "A sample of model 2011 cars from an online information service colleted to see how fuel efficiency (as highway mpg) relates to the cost (MSRP) ", - "download": "https://dasl.datadescription.com/download/data/3050", - "filename": "All-the-efficiency", - "name": "All the efficiency", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The price of delicious apples and regular gas are components of the Consumer Price Index. The data give those prices monthly for the year 2006 ", - "download": "https://dasl.datadescription.com/download/data/3055", - "filename": "Apples-and-gas", - "name": "Apples and gas", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "You have decided to invest in a bond fund and plan to limit your choice of funds to Morningstar “medalist” funds. But now you must choose between a taxable fund and a municipal bond fund that is at least partially tax-free. Which is better? Here are the % returns for the three-year period leading up", - "download": "https://dasl.datadescription.com/download/data/3080", - "filename": "Bond-funds", - "name": "Bond funds", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Number of sales people working in a bookstore and sales (in $1000) that day. These are realistic but invented data. ", - "download": "https://dasl.datadescription.com/download/data/3081", - "filename": "Bookstore-sales", - "name": "Bookstore sales", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "In 2015, the website NewGeography.com listed its ranking of the best cities for job growth in the United States. Nonfarm employment is also provided", - "download": "https://dasl.datadescription.com/download/data/3082", - "filename": "Boomtowns-2015", - "name": "Boomtowns 2015", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Home prices in two neighborhoods near San Francisco. Palo Alto is an older neighborhood and Foster City, a newer one. How do prices compare?", - "download": "https://dasl.datadescription.com/download/data/3104", - "filename": "CA-House-Prices", - "name": "CA House Prices", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "-", - "download": "https://dasl.datadescription.com/download/data/3097", - "filename": "Car-discounts", - "name": "Car discounts", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "-", - "download": "https://dasl.datadescription.com/download/data/3098", - "filename": "Car-origins", - "name": "Car origins", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The S&P/Case-Shiller Home Price Indices track changes in the value of residential real estate nationally and in 20 metropolitan regions. (Some of these indices are actually traded on the Chicago Mercantile Exchange.) The data set Case-Shiller by City gives the monthly index values for each of the 20 cities tracked by the Case-Shiller index and […] ", - "download": "https://dasl.datadescription.com/download/data/3102", - "filename": "Case-Shiller-by-city", - "name": "Case-Shiller by city", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Beginning in 2017, public companies will be required to disclose the ratio of CEO pay to median worker pay. The Glassdoor Economic Research Blog has published the data for 2014. The data includes CEO identities, companies, CEO compensation, median worker compensation (compiled by Glassdoor), and the ratio of CEO to worker compensation.", - "download": "https://dasl.datadescription.com/download/data/3105", - "filename": "CEO-Compensation-2014", - "name": "CEO Compensation 2014", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "-", - "download": "https://dasl.datadescription.com/download/data/3106", - "filename": "CEO-Salary-2012", - "name": "CEO Salary 2012", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Coffee is the world’s second largest\nlegal export commodity (after oil) and is the second largest\nsource of foreign exchange for developing nations. The\nUnited States consumes about one-fifth of the world’s coffee.\nThe International Coffee Organization (ICO) computes\na coffee price index using Colombian, Brazilian, and\na mixture of other coffee data. Data are provided for the\nmonthly average ICO price index (in $US) from Jan 2009 to December 2017c", - "download": "https://dasl.datadescription.com/download/data/3119", - "filename": "Coffee-prices-2017", - "name": "Coffee-prices-2017", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The cost of a variety of common items in 576 cities around the world in $, adjusted so that New York, U.S.A. is 100.", - "download": "https://dasl.datadescription.com/download/data/3120", - "filename": "COLall-2016", - "name": "COLall 2016", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Facts about companies selected from the Forbes 500 list for 1986. This is a 1/10 systematic sample from the alphabetical list of companies. The Forbes 500 includes all companies in the top 500 on any of the criteria, and thus has almost 800 companies in the list.", - "download": "https://dasl.datadescription.com/download/data/3125", - "filename": "Companies", - "name": "Companies", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Facts about companies selected from the Forbes 500 list for 2000", - "download": "https://dasl.datadescription.com/download/data/3595", - "filename": "Companies-Quickstart", - "name": "Companies Quickstart", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "-", - "download": "https://dasl.datadescription.com/download/data/3129", - "filename": "Consumer-spending", - "name": "Consumer spending", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "-", - "download": "https://dasl.datadescription.com/download/data/3130", - "filename": "Consumer-spending-post-holiday", - "name": "Consumer spending post holiday", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Numbeo.com lists the cost of living (COL) for 576 cities around the world. They report the typical cost of a number of staples. The cost of living is made up of many components. These data report a variety of everyday costs. How are they related? Can an overall cost of living be constructed from them?", - "download": "https://dasl.datadescription.com/download/data/3132", - "filename": "Cost-of-living-2016", - "name": "Cost of living 2016", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "-", - "download": "https://dasl.datadescription.com/download/data/3135", - "filename": "Cost-of-Living-2017", - "name": "Cost of Living 2017", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Cost of Living Index (Excl. Rent) is a relative indicator of consumer goods prices, including groceries, restaurants, transportation and utilities. Cost of Living Index doesn’t include accommodation expenses such as rent or mortgage. If a city has a Cost of Living Index of 120, it means Numbeo estimates it is 20% more expensive than New York (excluding rent).\nRent Index is an estimation of prices of renting apartments in the city compared to New York City. If Rent index is 80, Numbeo estimates that price of rents in that city is on an average 20% less than the price in New York.\nGroceries Index is an estimation of grocery prices in the city compared to New York City. To calculate this section, Numbeo uses weights of items in the “Markets” section for each city.\nRestaurants Index is a comparison of prices of meals and drinks in restaurants and bars compared to NYC.\nCost of Living Plus Rent Index is an estimation of consumer goods prices including rent comparing to New York City.\nLocal Purchasing Power shows relative purchasing power in buying goods and services in a given city for the average wage in that city. If domestic purchasing power is 40, this means that the inhabitants of that city with the average salary can afford to buy on an average 60% less goods and services than New York City residents with an average salary..", - "download": "https://dasl.datadescription.com/download/data/3136", - "filename": "Cost-of-living-2018", - "name": "Cost of living 2018", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The Consumer Price Index (CPI) summarizes the cost of a representative market basket\nof goods that includes groceries, restaurants, transportation, utilities, and medical\ncare. Global companies often use the CPI to determine living allowances and salaries\nfor employees. Inflation is often measured by how much the CPI changes from year to\nyear. Relative CPIs can be found for different cities. We have data giving CPI components\nrelative to New York City. For New York City, each index is 100(%).", - "download": "https://dasl.datadescription.com/download/data/3139", - "filename": "CPI-Worldwide-2016", - "name": "CPI Worldwide 2016", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "A credit card company wants to see how much customers in a particular segment of\ntheir market use their credit card. They have provided data on the amount\nspent by 500 selected customers during a 3-month period and have asked you to\nsummarize the expenditures. (Data are realistic, but disguised for confidentiality.)", - "download": "https://dasl.datadescription.com/download/data/3146", - "filename": "Credit-card-charges", - "name": "Credit card charges", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Peninsula Creameries sells both cottage cheese and ice cream. The CEO recently noticed that in months when the company sells more cottage cheese, it seems to sell more ice cream as well.", - "download": "https://dasl.datadescription.com/download/data/3152", - "filename": "Dairy-sales", - "name": "Dairy sales", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Data on raw diamonds from the internet. Price of a diamond depends on its Carat weight, color, clarity, and cut. The data are for 2690 diamonds of a variety of weights, colors, clarity, and cut. What predicts the price? Do the variables need to be reexpressed?", - "download": "https://dasl.datadescription.com/download/data/3161", - "filename": "Diamonds_", - "name": "Diamonds", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The Dow Jones stock index measures the performance of the stocks of America’s largest companies. A regression of the Dow prices on years 1972–2015 appears to be successful, but the residuals raise some questions.", - "download": "https://dasl.datadescription.com/download/data/3176", - "filename": "Dow-Jones-2015", - "name": "Dow Jones 2015", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Quarterly e-commerce retail sales (in millions of dollars) in the United States from 1999 to 2008 ", - "download": "https://dasl.datadescription.com/download/data/3180", - "filename": "E-commerce", - "name": "E-commerce", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "When implementing a packaged\nEnterprise Resource Planning (ERP) system, many companies\nreport that the module they first install is Financial\nAccounting. Among the measures used to gauge the\neffectiveness of their ERP system implementation is acceleration\nof the financial close process. The data hold a sample of\n8 companies that report their average time (in weeks) to\nfinancial close before and after the implementation of their\nERP system.", - "download": "https://dasl.datadescription.com/download/data/3191", - "filename": "ERP-Effectiveness", - "name": "ERP Effectiveness", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Sales (in $) for one week were collected for 18 stores in a food store chain in the northeastern United States. The stores and the towns they are located in vary in size.", - "download": "https://dasl.datadescription.com/download/data/3213", - "filename": "Food-sales", - "name": "Food sales", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The U.S. government provides fuel economy (in miles per gallon) and other information about late model cars sold in the US. How would you model the relationship between fuel economy and engine displacement (in liters)? Are there any cars that don’t fit the model? Can you explain why? ", - "download": "https://dasl.datadescription.com/download/data/3225", - "filename": "Fueleconomy-2016", - "name": "Fuel economy 2016", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Weekly gas prices for regular gas in the United States as reported by the U.S. Energy Information Administration for 2009 through August 2016 ", - "download": "https://dasl.datadescription.com/download/data/3232", - "filename": "Gas-prices-2016", - "name": "Gas prices 2016", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "-", - "download": "https://dasl.datadescription.com/download/data/3233", - "filename": "Gas-Prices-2017", - "name": "Gas Prices 2017", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Monthly gas prices for all grades and all formulations ($/gallon) in the United States as reported by the U.S. Energy Information Administration for 1993 through August 2018. Prices are available at the cite for all weeks. Data here are for the final week of each month.", - "download": "https://dasl.datadescription.com/download/data/3234", - "filename": "Gas-prices-2018", - "name": "Gas prices 2018", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Many drivers of cars that can run on regular gas actually buy premium in the belief that they will get better gas mileage. To test that belief, we use 10 cars from a company fleet in which all the cars run on regular gas. Each car is filled first with either regular or premium gasoline, decided by a coin toss, and the mileage for that tankful is recorded. Then the mileage is recorded again for the same cars for a tankful of the other kind of gaso-line. We don’t let the drivers know about this experiment.", - "download": "https://dasl.datadescription.com/download/data/3235", - "filename": "Gas-prices-monthly", - "name": "Gas prices monthly", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "-", - "download": "https://dasl.datadescription.com/download/data/3238", - "filename": "GDP-state", - "name": "GDP by state", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "-", - "download": "https://dasl.datadescription.com/download/data/3239", - "filename": "GDP-growth-2017", - "name": "GDP growth 2017", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Daily opening and closing stock prices (adjusted for splits and dividends) for Google, Inc. from Aug 19, 2004 through June 21, 2013 ", - "download": "https://dasl.datadescription.com/download/data/3247", - "filename": "Google-stock-prices", - "name": "Google stock prices", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "A graphite manufacturer makes long\nrolls of flexible graphite to be used to seal components in\ncombustion engines. The specifications state that the mean\nstrength should be 21.2 ounces per square yard with a\nstandard deviation of 0.29. Further specifications state that\nno roll should have strength less than 20.2 or more than\n22.2 ounces per square yard. If there is a defect in terms\nof the strength of the graphite rolls, the seal will not hold.\nAfter the roll is created, a beta scanner takes readings of\nthe basis weight in ounces per square yard. The data is\nseparated into 10 lanes with 20 scans in each lane. A sample\nconsists of one roll from each lane. The results from 20\nsamples follow are in the data.", - "download": "https://dasl.datadescription.com/download/data/3250", - "filename": "Graphite-production", - "name": "Graphite production", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "WinCo Foods, a large discount grocery\nretailer in the western United States, promotes itself as the lowest priced grocery retailer. In newspaper ads WinCo Foods published a price comparison for products between WinCo and several competing grocery retailers. One of the retailers compared against WinCo was Walmart, also known as a low price competitor. WinCo selected a variety of products, listed the price of the product charges at each retailer, and showed the sales receipt to prove the prices at WinCo were the lowest in the area. A sample of the products and their price comparison at both WinCo and Walmart are given.", - "download": "https://dasl.datadescription.com/download/data/3251", - "filename": "Grocery-prices", - "name": "Grocery prices", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Health expenditures", - "download": "https://dasl.datadescription.com/download/data/3260", - "filename": "Health-expenditures", - "name": "Health expenditures", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The price (per barrel) of oil has fluctuated over time. Various attempts to model it are generally not successful. ", - "download": "https://dasl.datadescription.com/download/data/3266", - "filename": "Historica-Oil-Prices-2016", - "name": "Historical Oil Prices 2016", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Holiday shopping", - "download": "https://dasl.datadescription.com/download/data/3267", - "filename": "Holiday-shopping", - "name": "Holiday shopping", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Holiday spending", - "download": "https://dasl.datadescription.com/download/data/3268", - "filename": "Holiday-spending", - "name": "Holiday spending", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Home depot sales", - "download": "https://dasl.datadescription.com/download/data/3269", - "filename": "Home-depot-sales", - "name": "Home depot sales", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Home Price Index 2017", - "download": "https://dasl.datadescription.com/download/data/3270", - "filename": "Home-Price-Index-2017", - "name": "Home Price Index 2017", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "House prices and properties in New York. What properties of a house can predict its price? Can we use such a model to identify houses that are extraordinarily expensive or inexpensive? ", - "download": "https://dasl.datadescription.com/download/data/3275", - "filename": "Housing-prices", - "name": "Housing prices", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "House prices and properties in New York. What properties of a house can predict its price? Can we use such a model to identify houses that are extraordinarily expensive or inexpensive? ", - "download": "https://dasl.datadescription.com/download/data/3276", - "filename": "Housing-prices-GE19", - "name": "Housing prices GE19", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "How are housing costs related to median family income?", - "download": "https://dasl.datadescription.com/download/data/3283", - "filename": "Income-housing", - "name": "Income and housing", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Income vs Hours 2013", - "download": "https://dasl.datadescription.com/download/data/3286", - "filename": "Income-vs-Hours-2013", - "name": "Income vs Hours 2013", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The U.S. Consumer Price Index and year, every 5 years since 1916. These are the values for January of each year. What is the trend? Can we model it with a linear regression? ", - "download": "https://dasl.datadescription.com/download/data/3291", - "filename": "Inflation-2016", - "name": "Inflation 2016", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Average annual interest rates (banks prime lending) in the United States from 1966 through 2009 ", - "download": "https://dasl.datadescription.com/download/data/3296", - "filename": "Interest-rates-2009", - "name": "Interest rates 2009", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "he amount charged for mortgages may be related to the total value of mortgage loans in the US. Can that relationship be modeled? Does it depend as well on the year? Consider a rotating plot of interest rate, mortgage total, and year. ", - "download": "https://dasl.datadescription.com/download/data/3297", - "filename": "Interest-mortgage", - "name": "Interest rates and mortgages", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The amount charged for mortgages may be related to the total value of mortgage loans in the US. Can that relationship be modeled? Does it depend as well on the year? Consider a rotating plot of interest rate, mortgage total, and year. ", - "download": "https://dasl.datadescription.com/download/data/3298", - "filename": "Interest-mortgage-2015", - "name": "Interest rates and mortgages 2015", + "description": "To shorten the time it takes him to make his favorite pizza, a student designed an experiment to test the effect of sugar and milk on the activation times for baking yeast. Specifically, he tested four different recipes and measured how many seconds it took for the same amount of dough to rise to the […] ", + "download": "https://dasl.datadescription.com/download/data/3042", + "filename": "activating-baking-yeast", + "name": "Activating baking yeast", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "This example is based on 1998 case study written by J. Hunt, E. Landry, and J. Rao as part of the Babson College case series. The data and setting used in this example are based on the actual case study, but the data have been modified and the conclusions are fictitious.", - "download": "https://dasl.datadescription.com/download/data/3308", - "filename": "Komtek-Technologies", - "name": "Komtek Technologies", + "description": "The American International Group (AIG) was once the 18th largest corporation in the world. By early 2007 AIG had assets of $1 trillion, $110 billion in revenues, 74 million customers and 116,000 employees in 130 countries and jurisdictions. Yet just 18 months later, AIG found itself on the brink of failure and in need of emergency […] ", + "download": "https://dasl.datadescription.com/download/data/3046", + "filename": "AIG-daily", + "name": "AIG daily", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "Real estate agents want to set correctly\nthe price of a house that’s about to go on the real estate\nmarket. They must choose a price that strikes a balance\nbetween one that is so high that the house takes too long\nto sell and one that’s so low that not enough value will go\nto the homeowner. One appraisal method is the “Comparative\nMarket Analysis” approach by which the market\nvalue of a house is based on recent sales of similar homes\nin the neighborhood. Because no two houses are exactly\nthe same, appraisers have to adjust comparable homes for\nsuch features as extra square footage, bedrooms, fireplaces,\nupgrading, parking facilities, swimming pool, lot size, location,\nand so on. The appraised market values and the selling\nprices of 45 homes from the same region are given.", - "download": "https://dasl.datadescription.com/download/data/3328", - "filename": "Market-value", - "name": "Market value", + "description": "The American International Group (AIG) was once the 18th largest corporation in the world. By early 2007 AIG had assets of $1 trillion, $110 billion in revenues, 74 million customers and 116,000 employees in 130 countries and jurisdictions. Yet just 18 months later, AIG found itself on the brink of failure and in need of emergency […]", + "download": "https://dasl.datadescription.com/download/data/3047", + "filename": "AIG-monthly", + "name": "AIG monthly", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "Marketing managers salaries", - "download": "https://dasl.datadescription.com/download/data/3327", - "filename": "Marketing-managers-salaries", - "name": "Marketing managers salaries", + "description": "A sample of model 2011 cars from an online information service colleted to see how fuel efficiency (as highway mpg) relates to the cost (MSRP) ", + "download": "https://dasl.datadescription.com/download/data/3050", + "filename": "All-the-efficiency", + "name": "All the efficiency", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "Quarterly median weekly earnings from the first quarter of 2003 through the first quarter of 2013 for men, 25 years of age or older, in the United States ", - "download": "https://dasl.datadescription.com/download/data/3336", - "filename": "Men-weekly-earnings-2013", - "name": "Men’s weekly earnings 2013", + "description": "The price of delicious apples and regular gas are components of the Consumer Price Index. The data give those prices monthly for the year 2006 ", + "download": "https://dasl.datadescription.com/download/data/3055", + "filename": "Apples-and-gas", + "name": "Apples and gas", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "Movie budgets", - "download": "https://dasl.datadescription.com/download/data/3347", - "filename": "Movie-budgets", - "name": "Movie budgets", + "description": "You have decided to invest in a bond fund and plan to limit your choice of funds to Morningstar “medalist” funds. But now you must choose between a taxable fund and a municipal bond fund that is at least partially tax-free. Which is better? Here are the % returns for the three-year period leading up", + "download": "https://dasl.datadescription.com/download/data/3080", + "filename": "Bond-funds", + "name": "Bond funds", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "Does money purchase a good movie? Is the US Gross revenue related to either the budge or the Rotten Tomatoes score? The dataset holds data on 609 recent releases that includes the USGross (in $M), the Budget ($M), the Run Time (minutes), and the score given by the critics on the Rotten Tomatoes website. ", - "download": "https://dasl.datadescription.com/download/data/3349", - "filename": "Movie-profits", - "name": "Movie profits", + "description": "Number of sales people working in a bookstore and sales (in $1000) that day. These are realistic but invented data. ", + "download": "https://dasl.datadescription.com/download/data/3081", + "filename": "Bookstore-sales", + "name": "Bookstore sales", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "Mutual fund flows", - "download": "https://dasl.datadescription.com/download/data/3354", - "filename": "Mutual-fund-flows", - "name": "Mutual fund flows", + "description": "In 2015, the website NewGeography.com listed its ranking of the best cities for job growth in the United States. Nonfarm employment is also provided", + "download": "https://dasl.datadescription.com/download/data/3082", + "filename": "Boomtowns-2015", + "name": "Boomtowns 2015", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "On December 30, 2016, the Standard and Poor’s (S&P) 500 index hit an all-time high. During 2016, the S&P returned 12.25%. Here is a histogram of the 2016 net returns (total return – annual expenses) for Money Magazine’s 50 Best Mutual Funds and ETFs. The net returns are computed from the data given by Money Magazine.", - "download": "https://dasl.datadescription.com/download/data/3353", - "filename": "Mutual-funds-2016", - "name": "Mutual funds 2016", + "description": "Home prices in two neighborhoods near San Francisco. Palo Alto is an older neighborhood and Foster City, a newer one. How do prices compare?", + "download": "https://dasl.datadescription.com/download/data/3104", + "filename": "CA-House-Prices", + "name": "CA House Prices", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "A study by the U.S. Small\nBusiness Administration used historical data to model the\nGDP per capita of 24 of the countries in the Organization\nfor Economic Cooperation and Development(OECD). The researchers hoped to show that more regulation leads to lower GDP/Capita. The multiple regression with all terms does have a significant P-value for Economic Regulation Index.\nHowever, Primary Education is not a significant predictor. If it is removed from the model, then OECD Regulation is no longer significant at .05. Was it added to the model just to judge the P-value of OECD regulation down to permit a publication that claimed an effect?\nCheck to see whether you think there is such an effect.", - "download": "https://dasl.datadescription.com/download/data/3373", - "filename": "OECD-economic-regulations", - "name": "OECD economic regulations", + "description": "-", + "download": "https://dasl.datadescription.com/download/data/3097", + "filename": "Car-discounts", + "name": "Car discounts", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "OECD GDP", - "download": "https://dasl.datadescription.com/download/data/3374", - "filename": "OECD-GDP", - "name": "OECD GDP", + "description": "-", + "download": "https://dasl.datadescription.com/download/data/3098", + "filename": "Car-origins", + "name": "Car origins", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "OECD GDP Growth", - "download": "https://dasl.datadescription.com/download/data/3375", - "filename": "OECD-GDP-Growth", - "name": "OECD GDP Growth", + "description": "The S&P/Case-Shiller Home Price Indices track changes in the value of residential real estate nationally and in 20 metropolitan regions. (Some of these indices are actually traded on the Chicago Mercantile Exchange.) The data set Case-Shiller by City gives the monthly index values for each of the 20 cities tracked by the Case-Shiller index and […] ", + "download": "https://dasl.datadescription.com/download/data/3102", + "filename": "Case-Shiller-by-city", + "name": "Case-Shiller by city", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "OECD Unemployment", - "download": "https://dasl.datadescription.com/download/data/3376", - "filename": "OECD-Unemployment", - "name": "OECD Unemployment", + "description": "Beginning in 2017, public companies will be required to disclose the ratio of CEO pay to median worker pay. The Glassdoor Economic Research Blog has published the data for 2014. The data includes CEO identities, companies, CEO compensation, median worker compensation (compiled by Glassdoor), and the ratio of CEO to worker compensation.", + "download": "https://dasl.datadescription.com/download/data/3105", + "filename": "CEO-Compensation-2014", + "name": "CEO Compensation 2014", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "The price (per barrel) of oil has fluctuated over time. Various attempts to model it are generally not successful. The data include both the inflation-adjusted prices of a barrel of oil from 1968 to 2016 and two prediction models. ", - "download": "https://dasl.datadescription.com/download/data/3377", - "filename": "Oil-prices-2016", - "name": "Oil prices 2016", + "description": "-", + "download": "https://dasl.datadescription.com/download/data/3106", + "filename": "CEO-Salary-2012", + "name": "CEO Salary 2012", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "Online Shopping", - "download": "https://dasl.datadescription.com/download/data/3384", - "filename": "Online-Shopping", - "name": "Online Shopping", + "description": "Coffee is the world’s second largest\nlegal export commodity (after oil) and is the second largest\nsource of foreign exchange for developing nations. The\nUnited States consumes about one-fifth of the world’s coffee.\nThe International Coffee Organization (ICO) computes\na coffee price index using Colombian, Brazilian, and\na mixture of other coffee data. Data are provided for the\nmonthly average ICO price index (in $US) from Jan 2009 to December 2017c", + "download": "https://dasl.datadescription.com/download/data/3119", + "filename": "Coffee-prices-2017", + "name": "Coffee-prices-2017", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "Sales volume and price of a slice of plain pizza ($) in Baltimore, Dallas, Chicago, and Denver for 156 weeks. How are prices and sales volumes related? Are patterns the same across cities? ", - "download": "https://dasl.datadescription.com/download/data/3395", - "filename": "Pizza-prices", - "name": "Pizza prices", + "description": "The cost of a variety of common items in 576 cities around the world in $, adjusted so that New York, U.S.A. is 100.", + "download": "https://dasl.datadescription.com/download/data/3120", + "filename": "COLall-2016", + "name": "COLall 2016", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "Poverty and Region 2015", - "download": "https://dasl.datadescription.com/download/data/3403", - "filename": "Poverty-and-Region-2015", - "name": "Poverty and Region 2015", + "description": "Facts about companies selected from the Forbes 500 list for 1986. This is a 1/10 systematic sample from the alphabetical list of companies. The Forbes 500 includes all companies in the top 500 on any of the criteria, and thus has almost 800 companies in the list.", + "download": "https://dasl.datadescription.com/download/data/3125", + "filename": "Companies", + "name": "Companies", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "UBS (one of the largest banks in the world) prepared\na report comparing prices, wages, and other economic conditions in cities around the world for it’s international clients. Some of the variables it measured in 73 cities are Cost of Living, Food Costs, Average Hourly Wage, average number of Working Hours per Year, average number of Vacation Days, hours of work (at the average wage) needed to buy an iPhone, minutes of work needed to buy a Big Mac, and Women’s Clothing Cost.", - "download": "https://dasl.datadescription.com/download/data/3405", - "filename": "Prices-Earnings", - "name": "Prices and Earnings", + "description": "Facts about companies selected from the Forbes 500 list for 2000", + "download": "https://dasl.datadescription.com/download/data/3595", + "filename": "Companies-Quickstart", + "name": "Companies Quickstart", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "The owner of a small organic food\nstore was concerned about her sales of a specialty yogurt\nmanufactured in Greece. As a result of increasing fuel\ncosts, she recently had to increase its price. To help boost\nsales, she decided to place the product on a different shelf\n(near eye level for most consumers) and in a location near\nother popular international products. She kept track of\nsales (number of containers sold per week) for six months\nafter she made the change.", - "download": "https://dasl.datadescription.com/download/data/3410", - "filename": "Product-placement", - "name": "Product placement", + "description": "-", + "download": "https://dasl.datadescription.com/download/data/3129", + "filename": "Consumer-spending", + "name": "Consumer spending", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "A company is producing and marketing\nnew reading activities for elementary school children that\nit believes will improve reading comprehension scores. A\nresearcher randomly assigns third graders to an eight-week\nprogram in which some will use these activities and others\nwill experience traditional teaching methods. At the end of\nthe experiment, both groups take a reading comprehension\nexam. Do these results suggest that the new activities\nare better?", - "download": "https://dasl.datadescription.com/download/data/3411", - "filename": "Product-testing", - "name": "Product testing", + "description": "-", + "download": "https://dasl.datadescription.com/download/data/3130", + "filename": "Consumer-spending-post-holiday", + "name": "Consumer spending post holiday", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "Productivity 2016", - "download": "https://dasl.datadescription.com/download/data/3409", - "filename": "Productivity-2016", - "name": "Productivity 2016", + "description": "Numbeo.com lists the cost of living (COL) for 576 cities around the world. They report the typical cost of a number of staples. The cost of living is made up of many components. These data report a variety of everyday costs. How are they related? Can an overall cost of living be constructed from them?", + "download": "https://dasl.datadescription.com/download/data/3132", + "filename": "Cost-of-living-2016", + "name": "Cost of living 2016", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "As a class project, students in a large Statistics class collected publicly available information on recent home sales in their hometowns. There are 894 properties. These are not a random sample, but they may be representative of home sales during a short period of time, nationwide. Among the variables available is an indication of whether the home was in an urban, suburban, or rural setting.", - "download": "https://dasl.datadescription.com/download/data/3423", - "filename": "Real-Estate", - "name": "Real Estate", + "description": "-", + "download": "https://dasl.datadescription.com/download/data/3135", + "filename": "Cost-of-Living-2017", + "name": "Cost of Living 2017", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "Real estate sample 1200", - "download": "https://dasl.datadescription.com/download/data/3423", - "filename": "Real-estate-sample-1200", - "name": "Real estate sample 1200", + "description": "Cost of Living Index (Excl. Rent) is a relative indicator of consumer goods prices, including groceries, restaurants, transportation and utilities. Cost of Living Index doesn’t include accommodation expenses such as rent or mortgage. If a city has a Cost of Living Index of 120, it means Numbeo estimates it is 20% more expensive than New York (excluding rent).\nRent Index is an estimation of prices of renting apartments in the city compared to New York City. If Rent index is 80, Numbeo estimates that price of rents in that city is on an average 20% less than the price in New York.\nGroceries Index is an estimation of grocery prices in the city compared to New York City. To calculate this section, Numbeo uses weights of items in the “Markets” section for each city.\nRestaurants Index is a comparison of prices of meals and drinks in restaurants and bars compared to NYC.\nCost of Living Plus Rent Index is an estimation of consumer goods prices including rent comparing to New York City.\nLocal Purchasing Power shows relative purchasing power in buying goods and services in a given city for the average wage in that city. If domestic purchasing power is 40, this means that the inhabitants of that city with the average salary can afford to buy on an average 60% less goods and services than New York City residents with an average salary..", + "download": "https://dasl.datadescription.com/download/data/3136", + "filename": "Cost-of-living-2018", + "name": "Cost of living 2018", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "Regular gas 2017", - "download": "https://dasl.datadescription.com/download/data/3426", - "filename": "Regular-gas-2017", - "name": "Regular gas 2017", + "description": "The Consumer Price Index (CPI) summarizes the cost of a representative market basket\nof goods that includes groceries, restaurants, transportation, utilities, and medical\ncare. Global companies often use the CPI to determine living allowances and salaries\nfor employees. Inflation is often measured by how much the CPI changes from year to\nyear. Relative CPIs can be found for different cities. We have data giving CPI components\nrelative to New York City. For New York City, each index is 100(%).", + "download": "https://dasl.datadescription.com/download/data/3139", + "filename": "CPI-Worldwide-2016", + "name": "CPI Worldwide 2016", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "Retail trade index", - "download": "https://dasl.datadescription.com/download/data/3427", - "filename": "Retail-trade-index", - "name": "Retail trade index", + "description": "A credit card company wants to see how much customers in a particular segment of\ntheir market use their credit card. They have provided data on the amount\nspent by 500 selected customers during a 3-month period and have asked you to\nsummarize the expenditures. (Data are realistic, but disguised for confidentiality.)", + "download": "https://dasl.datadescription.com/download/data/3146", + "filename": "Credit-card-charges", + "name": "Credit card charges", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "A sample from Fortune 500 companies", - "download": "https://dasl.datadescription.com/download/data/3434", - "filename": "Sales-profits", - "name": "Sales and profits", + "description": "Peninsula Creameries sells both cottage cheese and ice cream. The CEO recently noticed that in months when the company sells more cottage cheese, it seems to sell more ice cream as well.", + "download": "https://dasl.datadescription.com/download/data/3152", + "filename": "Dairy-sales", + "name": "Dairy sales", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "Prices of homes in Saratoga NY along with facts about them. Good basis for multiple regressions to predict the Price of the house. But several predictors are collinear. ", - "download": "https://dasl.datadescription.com/download/data/3437", - "filename": "Saratoga-house-prices", - "name": "Saratoga house prices", + "description": "Data on raw diamonds from the internet. Price of a diamond depends on its Carat weight, color, clarity, and cut. The data are for 2690 diamonds of a variety of weights, colors, clarity, and cut. What predicts the price? Do the variables need to be reexpressed?", + "download": "https://dasl.datadescription.com/download/data/3161", + "filename": "Diamonds_", + "name": "Diamonds", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "Prices of homes in Saratoga NY along with facts about them. Good basis for multiple regressions to predict the Price of the house. But several predictors are collinear. ", - "download": "https://dasl.datadescription.com/download/data/3436", - "filename": "Saratoga-houses", - "name": "Saratoga houses", + "description": "The Dow Jones stock index measures the performance of the stocks of America’s largest companies. A regression of the Dow prices on years 1972–2015 appears to be successful, but the residuals raise some questions.", + "download": "https://dasl.datadescription.com/download/data/3176", + "filename": "Dow-Jones-2015", + "name": "Dow Jones 2015", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "A group of Statistics students cut ads out of magazines. They were careful to find two ads for each of 10 similar items, one with a sexual image and one without. They arranged the ads in random order and had 39 subjects look at them for one minute. Then they asked the subjects to list as many of the products as they could remember. Their data are shown in the table. Is there evidence that the sexual images mattered?", - "download": "https://dasl.datadescription.com/download/data/3444", - "filename": "Sex-sells", - "name": "Sex sells", + "description": "Quarterly e-commerce retail sales (in millions of dollars) in the United States from 1999 to 2008 ", + "download": "https://dasl.datadescription.com/download/data/3180", + "filename": "E-commerce", + "name": "E-commerce", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "Researchers studying how a car’s fuel efficiency (in Miles Per Gallon) varies with its Speed drove a compact car 200 miles at various speeds on a test track. Their data are shown in the table. ", - "download": "https://dasl.datadescription.com/download/data/3454", - "filename": "Slower-is-cheaper", - "name": "Slower is cheaper", + "description": "When implementing a packaged\nEnterprise Resource Planning (ERP) system, many companies\nreport that the module they first install is Financial\nAccounting. Among the measures used to gauge the\neffectiveness of their ERP system implementation is acceleration\nof the financial close process. The data hold a sample of\n8 companies that report their average time (in weeks) to\nfinancial close before and after the implementation of their\nERP system.", + "download": "https://dasl.datadescription.com/download/data/3191", + "filename": "ERP-Effectiveness", + "name": "ERP Effectiveness", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "The data give the federal rate on 3-month Treasury bills from 1950 to 1980 and Years Since 1950. ", - "download": "https://dasl.datadescription.com/download/data/3477", - "filename": "TBill-rates-2016", - "name": "TBill rates 2016", + "description": "Sales (in $) for one week were collected for 18 stores in a food store chain in the northeastern United States. The stores and the towns they are located in vary in size.", + "download": "https://dasl.datadescription.com/download/data/3213", + "filename": "Food-sales", + "name": "Food sales", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "Tiffany was founded in 1837, when Charles Lewis Tiffany opened his first store in downtown Manhattan. Tiffany retails and distributes a selection of Tiffany & Co. brand jewelry at a range of prices. Today, more than 150 Tiffany & Co. stores sell to customers in U.S. and international markets.\nThe dataset holds quarterly sales data from 2005 through the middle of 2017. The data are suitable for time series modeling.", - "download": "https://dasl.datadescription.com/download/data/3482", - "filename": "Tiffany", - "name": "Tiffany 2017", + "description": "The U.S. government provides fuel economy (in miles per gallon) and other information about late model cars sold in the US. How would you model the relationship between fuel economy and engine displacement (in liters)? Are there any cars that don’t fit the model? Can you explain why? ", + "download": "https://dasl.datadescription.com/download/data/3225", + "filename": "Fueleconomy-2016", + "name": "Fuel economy 2016", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "Time on market", - "download": "https://dasl.datadescription.com/download/data/3483", - "filename": "Time-on-market", - "name": "Time on market", + "description": "Weekly gas prices for regular gas in the United States as reported by the U.S. Energy Information Administration for 2009 through August 2016 ", + "download": "https://dasl.datadescription.com/download/data/3232", + "filename": "Gas-prices-2016", + "name": "Gas prices 2016", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "Are people who use tobacco products more likely to consume alcohol? Here are data on household spending (in pounds) taken by the British government on 11 regions in Great Britain. Do tobacco and alcohol spending appear to be related? What questions do you have about these data? What conclusions can you draw? ", - "download": "https://dasl.datadescription.com/download/data/3485", - "filename": "Tobacco-and-alcohol", - "name": "Tobacco and alcohol", + "description": "-", + "download": "https://dasl.datadescription.com/download/data/3233", + "filename": "Gas-Prices-2017", + "name": "Gas Prices 2017", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "Daily closing stock prices for Toyota Motor Manufacturing from April 1, 2008, through June 21, 2013 ", - "download": "https://dasl.datadescription.com/download/data/3491", - "filename": "Toyota-stock-prices-2013", - "name": "Toyota stock prices 2013", + "description": "Monthly gas prices for all grades and all formulations ($/gallon) in the United States as reported by the U.S. Energy Information Administration for 1993 through August 2018. Prices are available at the cite for all weeks. Data here are for the final week of each month.", + "download": "https://dasl.datadescription.com/download/data/3234", + "filename": "Gas-prices-2018", + "name": "Gas prices 2018", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "US Unemployment rate from 1/1/2003 to 8/1/17. ", - "download": "https://dasl.datadescription.com/download/data/3507", - "filename": "Unemployment-2017", - "name": "Unemployment 2017", + "description": "Many drivers of cars that can run on regular gas actually buy premium in the belief that they will get better gas mileage. To test that belief, we use 10 cars from a company fleet in which all the cars run on regular gas. Each car is filled first with either regular or premium gasoline, decided by a coin toss, and the mileage for that tankful is recorded. Then the mileage is recorded again for the same cars for a tankful of the other kind of gaso-line. We don’t let the drivers know about this experiment.", + "download": "https://dasl.datadescription.com/download/data/3235", + "filename": "Gas-prices-monthly", + "name": "Gas prices monthly", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "Kelly's Blue Book: https://www.kbb.com/cars-for-sale/ accessed on 31 Aug 2017 using zip code 94305 200 mile radius BMW M5", - "download": "https://dasl.datadescription.com/download/data/3508", - "filename": "Used-BMW", - "name": "Used BMW M5 2017", + "description": "-", + "download": "https://dasl.datadescription.com/download/data/3238", + "filename": "GDP-state", + "name": "GDP by state", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "How does the age of a used car influence its price? This is a small enough data set to find a model with a calculator. ", - "download": "https://dasl.datadescription.com/download/data/3509", - "filename": "Used-cars", - "name": "Used cars 2014", + "description": "-", + "download": "https://dasl.datadescription.com/download/data/3239", + "filename": "GDP-growth-2017", + "name": "GDP growth 2017", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "The web site www.autotrader.com lists cars for sale. On January 22 2017,\nit listed 55 used Honda Civics for sale by owner. From those listings, we extracted the asking price ($), the mileage, and the model year (from which we computed the age of the car at the time the data were collected\nQuestions include how to best predict the price from mileage and age and whether any of the cars is a particularly good buy.\nOne care is a particularly old (1989) car that has relatively low mileage for such an old car. The seller claims it hasn’t been driven for several years. \nIt looks like Price might benefit from re-expression by logs.", - "download": "https://dasl.datadescription.com/download/data/3510", - "filename": "Used-Civics", - "name": "Used Civics 2017", + "description": "Daily opening and closing stock prices (adjusted for splits and dividends) for Google, Inc. from Aug 19, 2004 through June 21, 2013 ", + "download": "https://dasl.datadescription.com/download/data/3247", + "filename": "Google-stock-prices", + "name": "Google stock prices", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "The data give the gross domestic product (GDP) of the United States in trillions of 2009 dollars and time. ", - "download": "https://dasl.datadescription.com/download/data/3511", - "filename": "USGDP-2016", - "name": "USGDP 2016", + "description": "A graphite manufacturer makes long\nrolls of flexible graphite to be used to seal components in\ncombustion engines. The specifications state that the mean\nstrength should be 21.2 ounces per square yard with a\nstandard deviation of 0.29. Further specifications state that\nno roll should have strength less than 20.2 or more than\n22.2 ounces per square yard. If there is a defect in terms\nof the strength of the graphite rolls, the seal will not hold.\nAfter the roll is created, a beta scanner takes readings of\nthe basis weight in ounces per square yard. The data is\nseparated into 10 lanes with 20 scans in each lane. A sample\nconsists of one roll from each lane. The results from 20\nsamples follow are in the data.", + "download": "https://dasl.datadescription.com/download/data/3250", + "filename": "Graphite-production", + "name": "Graphite production", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "Walmart revenue", - "download": "https://dasl.datadescription.com/download/data/3514", - "filename": "Walmart-revenue", - "name": "Walmart revenue", + "description": "WinCo Foods, a large discount grocery\nretailer in the western United States, promotes itself as the lowest priced grocery retailer. In newspaper ads WinCo Foods published a price comparison for products between WinCo and several competing grocery retailers. One of the retailers compared against WinCo was Walmart, also known as a low price competitor. WinCo selected a variety of products, listed the price of the product charges at each retailer, and showed the sales receipt to prove the prices at WinCo were the lowest in the area. A sample of the products and their price comparison at both WinCo and Walmart are given.", + "download": "https://dasl.datadescription.com/download/data/3251", + "filename": "Grocery-prices", + "name": "Grocery prices", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "Gallup Poll of 1015 U.S. adults on April 9 – 12, 2015. Respondents were classified as high income (over $75,000), middle income ($30k–$75k), or low income (less than $30k). Those polled were asked for their views on redistributing U.S. wealth by heavily taxing the rich. Counts are reconstructed from percentages published by Gallup. ", - "download": "https://dasl.datadescription.com/download/data/3518", - "filename": "Wealth-Redistribution", - "name": "Wealth Redistribution", + "description": "Health expenditures", + "download": "https://dasl.datadescription.com/download/data/3260", + "filename": "Health-expenditures", + "name": "Health expenditures", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "Quarterly sales of Whole Foods Markets from 1995 through 2016. Whole Foods was purchased by Amazon in 2017, so 2016 is the final complete year prior to the merger. The data show a strong seasonal component even though food sales should not be seasonal. ", - "download": "https://dasl.datadescription.com/download/data/3522", - "filename": "Whole-Foods", - "name": "Whole Foods 2016", + "description": "The price (per barrel) of oil has fluctuated over time. Various attempts to model it are generally not successful. ", + "download": "https://dasl.datadescription.com/download/data/3266", + "filename": "Historica-Oil-Prices-2016", + "name": "Historical Oil Prices 2016", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "Wine production", - "download": "https://dasl.datadescription.com/download/data/3529", - "filename": "Wine-production", - "name": "Wine production", + "description": "Holiday shopping", + "download": "https://dasl.datadescription.com/download/data/3267", + "filename": "Holiday-shopping", + "name": "Holiday shopping", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "Quarterly median weekly earnings for U.S. women 25 years of age or older. Data are provided from the first quarter of 2003 through the first quarter of 2013. ", - "download": "https://dasl.datadescription.com/download/data/3535", - "filename": "Women-earnings", - "name": "Women’s weekly earnings 2013", + "description": "Holiday spending", + "download": "https://dasl.datadescription.com/download/data/3268", + "filename": "Holiday-spending", + "name": "Holiday spending", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "Youth Unemployment 2016", - "download": "https://dasl.datadescription.com/download/data/3546", - "filename": "Youth-Unemployment-2016", - "name": "Youth Unemployment 2016", + "description": "Home depot sales", + "download": "https://dasl.datadescription.com/download/data/3269", + "filename": "Home-depot-sales", + "name": "Home depot sales", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "http://jse.amstat.org/datasets/aaup.txt", - "download": "http://jse.amstat.org/datasets/aaup.dat.txt", - "filename": "AAUP", - "name": "AAUP Faculty Salary data", + "description": "Home Price Index 2017", + "download": "https://dasl.datadescription.com/download/data/3270", + "filename": "Home-Price-Index-2017", + "name": "Home Price Index 2017", "number_format": 31, "remove_quotes": true, - "separator": ",", + "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, - "use_first_row_for_vectorname": false + "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "The dataset bestbuy.day contains monthly data on computer usage \n(MIPS) and total number of stores from August 1996 to July 2000. \nAdditionally, information on the planned number of stores through \nDecember 2001 is available. These data can be used to compare \ntime-series forecasting with trend and seasonality components and \ncausal forecasting based on simple linear regression. The simple \nlinear regression model exhibits unequal error variances, suggesting \na transformation of Y.", - "download": "http://jse.amstat.org/datasets/bestbuy.dat.txt", - "filename": "Best_Buy", - "name": " BestBuy", + "description": "House prices and properties in New York. What properties of a house can predict its price? Can we use such a model to identify houses that are extraordinarily expensive or inexpensive? ", + "download": "https://dasl.datadescription.com/download/data/3275", + "filename": "Housing-prices", + "name": "Housing prices", "number_format": 31, "remove_quotes": true, - "separator": "SPACE", + "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, - "use_first_row_for_vectorname": false + "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "The dollar amount for a monthly (January 1991 through December 2000) \nhousehold electric bill is presented as a time series. In addition, \npotential explanatory variables are included. Twelve representative \nmonthly values are provided for the average temperature, for \nheating degree days, and for cooling degree days (not for each \nmonth for each year). Additional variables give the family size \neach month and indicate when a new electric meter and new heating \nand cooling equipment was installed. To convert the billing amount \nto estimated power consumption, a tiered rate function (supplied \nin the accompanying Instructor's Manual) and the costs of \nassociated riders (provided here) must be used. Consumption \nestimates resulting from this information are supplied.\t", - "download": "http://jse.amstat.org/datasets/electricbill.dat.txt", - "filename": "electricbill", - "name": " Electric Bill Data", + "description": "House prices and properties in New York. What properties of a house can predict its price? Can we use such a model to identify houses that are extraordinarily expensive or inexpensive? ", + "download": "https://dasl.datadescription.com/download/data/3276", + "filename": "Housing-prices-GE19", + "name": "Housing prices GE19", "number_format": 31, "remove_quotes": true, - "separator": "SPACE", + "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, - "use_first_row_for_vectorname": false + "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "The data file contains information on 76 single-family homes inEugene, Oregon during 2005. This dataset is suitable for a completemultiple linear regression analysis of home price data that coversmany of the usual regression topics, including interaction andpredictor transformations. Whereas realtors use experience and localknowledge to subjectively value a house based on its characteristics(size, amenities, location, etc.) and the prices of similar housesnearby, regression analysis can provide an alternative that moreobjectively models local house prices using these same data.SOURCES:The data were provided by Victoria Whitman, a realtor in Eugene, in2005. The data were used in a case study in Pardoe (2006).", - "download": "http://jse.amstat.org/datasets/homes76.dat.txt", - "filename": "homes76", - "name": " Modeling home prices using realtor data", + "description": "How are housing costs related to median family income?", + "download": "https://dasl.datadescription.com/download/data/3283", + "filename": "Income-housing", + "name": "Income and housing", "number_format": 31, "remove_quotes": true, - "separator": "SPACE", + "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, - "use_first_row_for_vectorname": false + "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": " This file contains total US gross box office receipts for 49 movies. This data is to accompany the article entitled Movie Data.", - "download": "http://jse.amstat.org/datasets/movietotal.dat.txt", - "filename": "movietotal", - "name": "movietotal", + "description": "Income vs Hours 2013", + "download": "https://dasl.datadescription.com/download/data/3286", + "filename": "Income-vs-Hours-2013", + "name": "Income vs Hours 2013", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "For 97 countries in the world, data are given for birth rates, death\nrates, infant death rates, life expectancies for males and females, and\nGross National Product.\n\nVARIABLE DESCRIPTIONS:\nColumns\n 1 - 6 Live birth rate per 1,000 of population\n 7 - 14 Death rate per 1,000 of population\n15 - 22 Infant deaths per 1,000 of population under 1 year old\n23 - 30 Life expectancy at birth for males\n31 - 38 Life expectancy at birth for females\n39 - 46 Gross National Product per capita in U.S. dollars \n47 - 52 Country Group\n 1 = Eastern Europe\n 2 = South America and Mexico\n 3 = Western Europe, North America, Japan, Australia, New Zealand\n 4 = Middle East\n 5 = Asia\n 6 = Africa\n53 - 74 Country", - "download": "http://jse.amstat.org/datasets/poverty.dat.txt", - "filename": "poverty", - "name": "The Statistics of Poverty and Inequality ", + "description": "The U.S. Consumer Price Index and year, every 5 years since 1916. These are the values for January of each year. What is the trend? Can we model it with a linear regression? ", + "download": "https://dasl.datadescription.com/download/data/3291", + "filename": "Inflation-2016", + "name": "Inflation 2016", "number_format": 31, "remove_quotes": true, - "separator": "SPACE", + "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, - "use_first_row_for_vectorname": false + "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "Some statistics on the fishing fleet and commercial catch, for each year\nbetween 1960 and 1982. The six columns are:\n\n 1. year\n 2. number of vessels registered for fishing\n 3. number of crab caught\n 4. total weight in kilograms of crab caught\n 5. total number of pot-lifts.\n 6. wholesale price of king crab in dollars per pound", - "download": "http://lib.stat.cmu.edu/crab/fleet", - "filename": "fleet", - "name": "fishing fleet and commercial catch", + "description": "Average annual interest rates (banks prime lending) in the United States from 1966 through 2009 ", + "download": "https://dasl.datadescription.com/download/data/3296", + "filename": "Interest-rates-2009", + "name": "Interest rates 2009", "number_format": 31, "remove_quotes": true, - "separator": "SPACE", + "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, - "use_first_row_for_vectorname": false + "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "Commercial catch data for 1960-1982, broken out by district. The four columns\nare:\n\n 1. year\n 2. district number (1, 2, 3 or 4)\n 3. total catch as a count\n 4. total catch in kilograms", - "download": "http://lib.stat.cmu.edu/crab/catch", - "filename": "catch", - "name": "Commercial catch data", + "description": "he amount charged for mortgages may be related to the total value of mortgage loans in the US. Can that relationship be modeled? Does it depend as well on the year? Consider a rotating plot of interest rate, mortgage total, and year. ", + "download": "https://dasl.datadescription.com/download/data/3297", + "filename": "Interest-mortgage", + "name": "Interest rates and mortgages", "number_format": 31, "remove_quotes": true, - "separator": "SPACE", + "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, - "use_first_row_for_vectorname": false - } - ], - "subcategory_name": "Economics" - }, - { - "datasets": [ + "use_first_row_for_vectorname": true + }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "The following description is from Lee (1994): \nRugby football is a popular quasi-amateur sport widely played in the United Kingdom, France, Australia, New Zealand and South Africa. It is rapidly gaining popularity in the US, Canada, Japan and parts of Europe. Recently, some of the rules of the game have been changed, with the aim of making play more exciting. In a study to examine the effects of the rule changes, Hollings and Triggs (1993) collected data on some recent games. \nTypically, a game consists of bursts of activity which terminate when points are scored, if the ball is moved out of the field of play or if an infringement of the rules occurs. In 1992, the investigators gathered data on ten international matches which involved the New Zealand national team, the All Blacks. The first five games studied were the last international games played under the old rules, and the second set of five were the first internationals played under the new rules. \nFor each of the ten games, the data list the successive times (in seconds) of each passage of play in that game. One interest is to see whether the passages were on average longer or shorter under the new rules. (The intention when the rules were changed was almost certainly to make the play more continuous.) ", - "download": "http://www.statsci.org/data/oz/rugby.txt", - "filename": "rugby", - "name": "Time of Passages of Play in Rugby", + "description": "The amount charged for mortgages may be related to the total value of mortgage loans in the US. Can that relationship be modeled? Does it depend as well on the year? Consider a rotating plot of interest rate, mortgage total, and year. ", + "download": "https://dasl.datadescription.com/download/data/3298", + "filename": "Interest-mortgage-2015", + "name": "Interest rates and mortgages 2015", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "Mark Taylor was Captain of the Australian test cricket team from May 1994 until February 1999. By the middle of 1997, the Australian team has won its 7 consecutive international test series, making Taylor the most successful Australian Captain in history. However his poor batting form from mid 1996 to mid 1997 gave the Australian selectors a dilemma in deciding whether his excellent Captaincy made up for the run of poor scores off his own bat. \nThe data below gives Mark Taylor's test scores from the middle of 1989 to the middle of 1995, a period over which he was batting well. Scores were made in Australia's first or second innings of each match. Sometimes Australia was not required to bat twice, in which case the second innings is marked as missing. There are also a number of `not outs'.", - "download": "http://www.statsci.org/data/oz/taylor.txt", - "filename": "taylor_", - "name": "Mark Taylor's Test Cricket Scores", + "description": "This example is based on 1998 case study written by J. Hunt, E. Landry, and J. Rao as part of the Babson College case series. The data and setting used in this example are based on the actual case study, but the data have been modified and the conclusions are fictitious.", + "download": "https://dasl.datadescription.com/download/data/3308", + "filename": "Komtek-Technologies", + "name": "Komtek Technologies", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "The Sydney-Hobart yacht race starts from Sydney Harbour on Boxing day (December 26) and finishes several days later in Hobart. It is a 630 nautical mile ocean race. The data give the winning times from 1945 to 1993, as they appeared in the Sydney Morning Herald on 24 December, 1994, plus the winning times for 1994 to 1997. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nYacht\n\nName of winning yacht\n\nYear\n\nYear\n\nDays\n\nDays unit of winning time\n\nHours\n\nHours unit of winning time\n\nMinutes\n\nMinutes unit of winning time\n\nTime\n\nWinning time in minutes (should match time in Days, Hours and Minutes)\n", - "download": "http://www.statsci.org/data/oz/sydhob.txt", - "filename": "sydhob", - "name": "Sydney to Hobart Yacht Race Winning Times", + "description": "Real estate agents want to set correctly\nthe price of a house that’s about to go on the real estate\nmarket. They must choose a price that strikes a balance\nbetween one that is so high that the house takes too long\nto sell and one that’s so low that not enough value will go\nto the homeowner. One appraisal method is the “Comparative\nMarket Analysis” approach by which the market\nvalue of a house is based on recent sales of similar homes\nin the neighborhood. Because no two houses are exactly\nthe same, appraisers have to adjust comparable homes for\nsuch features as extra square footage, bedrooms, fireplaces,\nupgrading, parking facilities, swimming pool, lot size, location,\nand so on. The appraised market values and the selling\nprices of 45 homes from the same region are given.", + "download": "https://dasl.datadescription.com/download/data/3328", + "filename": "Market-value", + "name": "Market value", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "Winning heights or distances (inches) for the High Jump, Discus and Long Jump events at the Olympics up to 1996. ", - "download": "http://www.statsci.org/data/general/olympic.txt", - "filename": "olympic", - "name": "Olympic Records for High Jump, Discus and Long Jump", + "description": "Marketing managers salaries", + "download": "https://dasl.datadescription.com/download/data/3327", + "filename": "Marketing-managers-salaries", + "name": "Marketing managers salaries", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "The data give the number of medals won by each medal-winning country in the 1992 Summary Olympic Games in Barcelona, Spain, and the 1994 Winter Olympic Games in Lillehammer, Norway. Also given is the population and latitude of each country. Griffiths et al write: \n... the media spent a lot of time discussing the number of medals won by each country's athletes. The implication was that the comparison was of some importance. However, larger countries would be expected to win more medals than smaller countries, simply because of their larger populations. \n... some viewers, especially those from the smaller countries, felt that the number of medals should be standardised to account for the very wide range of populations, and that a per capita number of medals for a country was a fairer comparison. Others felt that this was unfair to the countries with larger populations - that having twice as many people did not lead to twice as many medals. If standardisation is performed adequately, there should be no systematic relationship between the adjusted medal count and population. \nAlso countries further from the equator might be expected to do better in the winter olympics. \nThe data is incomplete in that countries with no medals are not included. These would be mostly smaller population countries. ", - "download": "http://www.statsci.org/data/oz/medals.txt", - "filename": "medals", - "name": "Olympic Medals", + "description": "Quarterly median weekly earnings from the first quarter of 2003 through the first quarter of 2013 for men, 25 years of age or older, in the United States ", + "download": "https://dasl.datadescription.com/download/data/3336", + "filename": "Men-weekly-earnings-2013", + "name": "Men’s weekly earnings 2013", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "This data set was assembled by Rowan Todd and Mark McNaughton, two students studying Statistics at QUT in a class taught by Dr Margaret Mackisack. For a class project they decided to investigate the effect on football game attendance of various covariates. They collected data involving Saturday Australian Football League (AFL) matches at the Melbourne Cricket Ground (MCG). They looked only at matches during the normal home and away season (i.e. not including finals). They used statistics from all such games in 1993 and 1994 (nineteen relevant matches in 1993 and twenty-two in 1994). The response variable measured was attendance at the MCG, and after consideration, they came up with the following covariates: \n\n\n\n\n\nVariable \n\nDescription\n\n\n\n\nMCG \n\nAttendance at the MCG in 1000's.\n\nTemp \n\nTemperature. The forecast maximum temperature on the day of the match, in whole degrees C, found in The Weekend Australian.\n\nOther\n\nAttendance at other matches in 1000's. The sum of the attendances at other AFL matches in Melbourne and Geelong on the same day as the match in question.\n\nMembers\n\nMembership. The sum of the memberships of the two clubs whose teams were playing the match in question in 1000's.\n\nTop50\n\nNumber of players from the top fifty. The number of players in the top 50 in the AFL who happened to be playing in the match in question.\n\nDate\n\nDate of the match in the format dd/mm/yy.\n\nHome\n\nAbbreviation for home team.\n\nAway\n\nAbbreviation for away team.\n", - "download": "http://www.statsci.org/data/oz/afl.txt", - "filename": "afl", - "name": "AFL Crowd Attendance at the MCG", + "description": "Movie budgets", + "download": "https://dasl.datadescription.com/download/data/3347", + "filename": "Movie-budgets", + "name": "Movie budgets", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "The following description is from Lee (1994): \nRugby football is a popular quasi-amateur sport widely played in the United Kingdom, France, Australia, New Zealand and South Africa. It is rapidly gaining popularity in the US, Canada, Japan and parts of Europe. Recently, some of the rules of the game have been changed, with the aim of making play more exciting. In a study to examine the effects of the rule changes, Hollings and Triggs (1993) collected data on some recent games. \nTypically, a game consists of bursts of activity which terminate when points are scored, if the ball is moved out of the field of play or if an infringement of the rules occurs. In 1992, the investigators gathered data on ten international matches which involved the New Zealand national team, the All Blacks. The first five games studied were the last international games played under the old rules, and the second set of five were the first internationals played under the new rules. \nFor each of the ten games, the data list the successive times (in seconds) of each passage of play in that game. One interest is to see whether the passages were on average longer or shorter under the new rules. (The intention when the rules were changed was almost certainly to make the play more continuous.) \n", - "download": "http://www.statsci.org/data/oz/rugby.txt", - "filename": "rugby_", - "name": "Time of Passages of Play in Rugby", + "description": "Does money purchase a good movie? Is the US Gross revenue related to either the budge or the Rotten Tomatoes score? The dataset holds data on 609 recent releases that includes the USGross (in $M), the Budget ($M), the Run Time (minutes), and the score given by the critics on the Rotten Tomatoes website. ", + "download": "https://dasl.datadescription.com/download/data/3349", + "filename": "Movie-profits", + "name": "Movie profits", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "The data give the record-winning times for 35 hill races in Scotland, as reported by Atkinson (1986). The distance travelled and the height climbed in each race is also given. The data contains a known error - Atkinson (1986) reports that the record for Knock Hill (observation 18) should actually be 18 minutes rather than 78 minutes. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nRace\n\nName of race\n\nDistance\n\nDistance covered in miles\n\nClimb\n\nElevation climbed during race in feet\n\nTime\n\nRecord time for race in minutes\n", - "download": "http://www.statsci.org/data/general/hills.txt", - "filename": "hills_", - "name": "Scottish Hill Races", + "description": "Mutual fund flows", + "download": "https://dasl.datadescription.com/download/data/3354", + "filename": "Mutual-fund-flows", + "name": "Mutual fund flows", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "Investigators studied physical characteristics and ability in 13 football punters. Each volunteer punted a football ten times. The investigators recorded the average distance for the ten punts, in feet. They also recorded the average hang time (time the ball is in the air before the receiver catches it) for the ten punts, in seconds. In addition, the investigators recorded five measures of strength and flexibility for each punter: right leg strength (pounds), left leg strength (pounds), right hamstring muscle flexibility (degrees), left hamstring muscle flexibility (degrees), and overall leg strength (foot-pounds). From the study \"The relationship between selected physical performance variables and football punting ability\" by the Department of Health, Physical Education and Recreation at the Virginia Polytechnic Institute and State University, 1983. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nDistance\n\nDistance travelled in feet\n\nHang\n\nTime in air in seconds\n\nR_Strength\n\nRight leg strength in pounds\n\nL_Strength\n\nLeft leg strength in pounds\n\nR_Flexibility\n\nRight leg flexibility in degrees\n\nL_Flexibility\n\nLeft leg flexibility in degrees\n\nO_Strength\n\nOverall leg strength in pounds\n", - "download": "http://www.statsci.org/data/general/punting.txt", - "filename": "punting", - "name": "American Football Punters", + "description": "On December 30, 2016, the Standard and Poor’s (S&P) 500 index hit an all-time high. During 2016, the S&P returned 12.25%. Here is a histogram of the 2016 net returns (total return – annual expenses) for Money Magazine’s 50 Best Mutual Funds and ETFs. The net returns are computed from the data given by Money Magazine.", + "download": "https://dasl.datadescription.com/download/data/3353", + "filename": "Mutual-funds-2016", + "name": "Mutual funds 2016", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "Data on 102 male and 100 female athletes collected at the Australian Institute of Sport, courtesy of Richard Telford and Ross Cunningham. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nSport\n\nSport\n\nSex\n\nmale or female\n\nHt\n\nHeight in cm\n\nWt\n\nWeight in kg\n\nLBM\n\nLean body mass\n\nRCC\n\nRed cell count\n\nWCC\n\nWhite cell count\n\nHc\n\nHematocrit\n\nHg\n\nHemoglobin\n\nFerr\n\nPlasma ferritin concentration\n\nBMI\n\nBody mass index = weight/height^2\n\nSSF\n\nSum of skin folds\n\n%Bfat\n\n% body fat\n\n\n\n", - "download": "http://www.statsci.org/data/oz/ais.txt", - "filename": "ais_", - "name": "Australian Institute of Sport", + "description": "A study by the U.S. Small\nBusiness Administration used historical data to model the\nGDP per capita of 24 of the countries in the Organization\nfor Economic Cooperation and Development(OECD). The researchers hoped to show that more regulation leads to lower GDP/Capita. The multiple regression with all terms does have a significant P-value for Economic Regulation Index.\nHowever, Primary Education is not a significant predictor. If it is removed from the model, then OECD Regulation is no longer significant at .05. Was it added to the model just to judge the P-value of OECD regulation down to permit a publication that claimed an effect?\nCheck to see whether you think there is such an effect.", + "download": "https://dasl.datadescription.com/download/data/3373", + "filename": "OECD-economic-regulations", + "name": "OECD economic regulations", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "The data was collected by Grant Elliott, a statistics student at the Queensland University of Technology in a subject taught by Dr Margaret Mackisack. Here is his description of the data and its collection: \nLiving at a squash court spurred on the idea of this experiment. Frustrated playing squash one night, I thought that the squash ball I was playing with seemed to bounce and react differently to what I was previously used to. So I conducted this experiment on the squash ball, looking at the type of ball, temperature of the ball and the age of the ball. \nBall type: In this experiment I used a 'yellow dot' squash ball and a 'double x' squash ball. A 'yellow dot' is super slow and a 'double x' is termed extra super slow. \nTemperature: When playing with a squash ball it tends to heat up. So I took it to extremes where I had 'room temperature' and 'playing temperature'. To duplicate 'playing temperature' the ball was placed in a cup of boiling water for 45 sec. \nAge: I expected age to be my most significant factor. Squash balls, being a sealed ball, shouldn't vary when they get older, so I used a new ball and compared it to an old ball. \nProcedure: I first thought of dropping the balls from a set height and seeing how far they bounced against a tape measure. This idea was scrapped as too much error came into it because you couldn't accurately measure when the maximum height of the bounce was. I then thought of a ball machine. I set the ball machine up and measured how far back did the ball come off the front wall when shot out of the ball machine. This eliminated a lot of varying in my figures as the ball machine shoots the balls out at roughly the same speed and trajectory. It doesn't take all the varying out as I wouldn't know whether the ball machine does shoot it out at exactly the same speed, but it keeps variation to a minimum. \nCriticism: Measuring the distance from the wall was done by my friend and I. We both would watch from different angles and would see where the ball landed. This means our figures are probably out by a couple of centimetres. When the balls were dropped into the water I forgot to take some of them out after 45 sec. Also with some I moved them around in the water to get the heat distributed evenly but others I forgot to move as I was collecting and organising the next ball. Another criticism is the temperature of the water. I put new boiling water into the cup after 4 balls had been in it. Therefore the last ball to go in wouldn't be the same temperature as the first ball.", - "download": "http://www.statsci.org/data/oz/squash.txt", - "filename": "squash", - "name": "Squash Ball Experiment", + "description": "OECD GDP", + "download": "https://dasl.datadescription.com/download/data/3374", + "filename": "OECD-GDP", + "name": "OECD GDP", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "The dynamic and repetitive nature of running means that runners are particularly prone to over-use injuries such as lower back pain. Chronic pain is often caused by muscle imbalances, which result in faulty alignment of and abnormal stresses applied to the spinal column. Muscle imbalances originate as adapations in motor control due to pain or external stimuli, and are then reinforced and preserved by repetition. \nThis study, conducted by Physiotherapy student Andrew Mooney, examined the flexibility of four major muscle groups associated with movement of the hip, with particular attention to imbalances between the left and right sides or between the dominant and non-dominant sides. \nA total of 33 male subjects were included in the study. The subjects were divided into three groups: 11 runners with low back pain, 11 runners without low back pain and 11 sedentary individuals without low back pain. (Runners were recruited from the Ashgrove and Toowong athletics clubs, non runners from the University of Queensland and the general community. Runners with lower back pain were recruited first. Once this subject group was tested, subjects for the two control groups were recruited to match the runners with low back pain according to age, height and weight.) \nThe muscle groups examined were \nthe iliopsoas, \nthe rectus femoris, \nthe tensor fascia lata/iliotibial band (ITB/TFL), and \nthe hamstrings \nFor each muscle group, two measures of flexibility were used. The first, relative flexibility, was related to the range of movement of the joint before postural compensations occurred, and the second was a measure of the maximal functional length of the muscle. Relative flexibility and functional length were measured for each muscle group on both the left and right sides of the body. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nSubject \n\nSubject number, 1 - 33\n\nGroup\n\nPain, NoPain or Sedentary\n\nMatch\n\n1 - 11, indicating matched triples\n\nAge\n\nAge of subject in years\n\nHeight\n\nHeight of subject in cm\n\nWeight\n\nWeight of subject in kg\n\nDistance\n\nType of running event: Sprint, middle distance (Mid) or long distance (Long)\n\nYears\n\nNumber of years running\n\nDominant\n\nDominant side, Left or Right\n\nDF.Iliopsoas\n\nRelative flexibility of iliopsoas muscle on dominant side in degrees. Positive numbers indicate above the horizontal, negative numbers below the horizontal.\n\nDF.Rectus\n\nRelative flexibility of rectus femoris muscle on dominant side in degrees\n\nDF.ITBTFL\n\nRelative flexibility of ITB/TFL muscle on dominant side in degrees\n\nDF.Hamstring\n\nRelative flexibility of hamstring muscles on dominant side in degrees\n\nDL.Iliopsoas\n\nFunctional length of iliopsoas muscle on dominant side\n\nDL.Rectus\n\nFunctional length of rectus femoris muscle on dominant side\n\nDL.ITBTFL\n\nFunctional length of ITB/TFL muscle on dominant side\n\nDL.Hamstring\n\nFunctional length of hamstring muscles on dominant side\n\nNF.Iliopsoas\n\nRelative flexibility of iliopsoas muscle on nondominant side in degrees\n\nNF.Rectus\n\nRelative flexibility of rectus femoris muscle on nondominant side in degrees\n\nNF.ITBTFL\n\nRelative flexibility of ITB/TFL muscle on nondominant side in degrees\n\nNF.Hamstring\n\nRelative flexibility of hamstring muscles on nondominant side in degrees\n\nNL.Iliopsoas\n\nFunctional length of iliopsoas muscle on nondominant side\n\nNL.Rectus\n\nFunctional length of rectus femoris muscle on nondominant side\n\nNL.ITBTFL\n\nFunctional length of ITB/TFL muscle on nondominant side\n\nNL.Hamstring\n\nFunctional length of hamstring muscles on nondominant side\n", - "download": "http://www.statsci.org/data/oz/backpain.txt", - "filename": "backpain", - "name": "Runners with Low Back Pain", + "description": "OECD GDP Growth", + "download": "https://dasl.datadescription.com/download/data/3375", + "filename": "OECD-GDP-Growth", + "name": "OECD GDP Growth", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "The data come from the 1990 Pilot Surf/Health Study of NSW Water Board. The first column takes values 1 or 2 according to the recruit's perception of whether (s)he is a Frequent OCean Swimmer, the second column has values 1 or 4 according to recruit's usually chosen swimming location (1 for non-beach, 4 for beach), the third column has values 2 (aged 15-19), 3 (aged 20-25), or 4 (aged 25-29), the fourth column has values 1 (male) or 2 (female) and finally, the fifth column has the number of self-diagnosed ear infections that were reported by the recruit.", - "download": "http://www.statsci.org/data/oz/earinf.txt", - "filename": "earinf", - "name": "Ear Infections in Swimmers", + "description": "OECD Unemployment", + "download": "https://dasl.datadescription.com/download/data/3376", + "filename": "OECD-Unemployment", + "name": "OECD Unemployment", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": " Alex Rodriguez (known to fans as A-Rod)was the youngest player ever to hit 500 home runs. The file holds the number of home runs hit by A-Rod during the 1994–2016 seasons. Describe the distribution, mentioning its shape and any unusual features. ", - "download": "https://dasl.datadescription.com/download/data/3038", - "filename": "a-rod-2016", - "name": "A-Rod 2016", + "description": "The price (per barrel) of oil has fluctuated over time. Various attempts to model it are generally not successful. The data include both the inflation-adjusted prices of a barrel of oil from 1968 to 2016 and two prediction models. ", + "download": "https://dasl.datadescription.com/download/data/3377", + "filename": "Oil-prices-2016", + "name": "Oil prices 2016", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "In Olympic Archery both men and women start with a field of 64 qualifiers. Each archer shoots a round of 72 arrows (total possible score: 720) to establish a seeding position. Then they participate in a single-elimination contest. Thus, the seeding round is the only one that provides data for all archers (because some are […] ", - "download": "https://dasl.datadescription.com/download/data/3056", - "filename": "Archery", - "name": "Archery", + "description": "Online Shopping", + "download": "https://dasl.datadescription.com/download/data/3384", + "filename": "Online-Shopping", + "name": "Online Shopping", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "American League baseball games are played under the designated hitter rule, meaning that pitchers, often weak hitters, do not come to bat. Baseball owners believe that the designated hitter rule means more runs scored, which in turn means higher attendance. Is there evidence that more fans attend games if the teams score more runs? The […] ", - "download": "https://dasl.datadescription.com/download/data/3057", - "filename": "Attendance-2016", - "name": "Attendance 2016", + "description": "Sales volume and price of a slice of plain pizza ($) in Baltimore, Dallas, Chicago, and Denver for 156 weeks. How are prices and sales volumes related? Are patterns the same across cities? ", + "download": "https://dasl.datadescription.com/download/data/3395", + "filename": "Pizza-prices", + "name": "Pizza prices", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "It has been suggested that children born in the summer have an advantage over their peers when it comes to sports, perhaps because they can be outdoors when they are young. The data report the number of professional ballplayers born in each month of the year for one season of professional baseball. ", - "download": "https://dasl.datadescription.com/download/data/3060", - "filename": "Ballplayer-births", - "name": "Ballplayer births", + "description": "Poverty and Region 2015", + "download": "https://dasl.datadescription.com/download/data/3403", + "filename": "Poverty-and-Region-2015", + "name": "Poverty and Region 2015", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "-", - "download": "https://dasl.datadescription.com/download/data/3063", - "filename": "Baseball-attendance", - "name": "Baseball attendance", + "description": "UBS (one of the largest banks in the world) prepared\na report comparing prices, wages, and other economic conditions in cities around the world for it’s international clients. Some of the variables it measured in 73 cities are Cost of Living, Food Costs, Average Hourly Wage, average number of Working Hours per Year, average number of Vacation Days, hours of work (at the average wage) needed to buy an iPhone, minutes of work needed to buy a Big Mac, and Women’s Clothing Cost.", + "download": "https://dasl.datadescription.com/download/data/3405", + "filename": "Prices-Earnings", + "name": "Prices and Earnings", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "-", - "download": "https://dasl.datadescription.com/download/data/3064", - "filename": "Baseball-circumferences", - "name": "Baseball circumferences", + "description": "The owner of a small organic food\nstore was concerned about her sales of a specialty yogurt\nmanufactured in Greece. As a result of increasing fuel\ncosts, she recently had to increase its price. To help boost\nsales, she decided to place the product on a different shelf\n(near eye level for most consumers) and in a location near\nother popular international products. She kept track of\nsales (number of containers sold per week) for six months\nafter she made the change.", + "download": "https://dasl.datadescription.com/download/data/3410", + "filename": "Product-placement", + "name": "Product placement", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "Ballplayers have been signing ever larger contracts. The highest salaries (in millions of dollars per season) for each year since 1874 are in the data file. ", - "download": "https://dasl.datadescription.com/download/data/3065", - "filename": "Baseball-salaries-2015", - "name": "Baseball salaries 2015", + "description": "A company is producing and marketing\nnew reading activities for elementary school children that\nit believes will improve reading comprehension scores. A\nresearcher randomly assigns third graders to an eight-week\nprogram in which some will use these activities and others\nwill experience traditional teaching methods. At the end of\nthe experiment, both groups take a reading comprehension\nexam. Do these results suggest that the new activities\nare better?", + "download": "https://dasl.datadescription.com/download/data/3411", + "filename": "Product-testing", + "name": "Product testing", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "-", - "download": "https://dasl.datadescription.com/download/data/3066", - "filename": "Baseball-salaries-2016", - "name": "Baseball salaries 2016", + "description": "Productivity 2016", + "download": "https://dasl.datadescription.com/download/data/3409", + "filename": "Productivity-2016", + "name": "Productivity 2016", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "-", - "download": "https://dasl.datadescription.com/download/data/3067", - "filename": "Baseball-weights", - "name": "Baseball weights", + "description": "As a class project, students in a large Statistics class collected publicly available information on recent home sales in their hometowns. There are 894 properties. These are not a random sample, but they may be representative of home sales during a short period of time, nationwide. Among the variables available is an indication of whether the home was in an urban, suburban, or rural setting.", + "download": "https://dasl.datadescription.com/download/data/3423", + "filename": "Real-Estate", + "name": "Real Estate", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "-", - "download": "https://dasl.datadescription.com/download/data/3069", - "filename": "Basketball-shots", - "name": "Basketball shots", + "description": "Real estate sample 1200", + "download": "https://dasl.datadescription.com/download/data/3423", + "filename": "Real-estate-sample-1200", + "name": "Real estate sample 1200", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "A company that makes basketballs has the motto: “Our basketballs are ready to play.” Therefore, it is important to the company that the basketballs are inflated with the proper amount of air when shipped. Most basketballs are inflated to 7 to 9 pounds per square inch. Recently the company selected a random basketball from its […] ", - "download": "https://dasl.datadescription.com/download/data/3068", - "filename": "Basketballs", - "name": "Basketballs", + "description": "Regular gas 2017", + "download": "https://dasl.datadescription.com/download/data/3426", + "filename": "Regular-gas-2017", + "name": "Regular gas 2017", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "The Belmont Stakes is the last and longest of the three horse races that make up the Triple Crown. Curiously, in some of the Belmont races horses have run clockwise around the track, and in others they have run counterclockwise. Do the horses care? But note that the length of the race has also not […] ", - "download": "https://dasl.datadescription.com/download/data/3072", - "filename": "Belmont-stakes-2015", - "name": "Belmont stakes 2015", + "description": "Retail trade index", + "download": "https://dasl.datadescription.com/download/data/3427", + "filename": "Retail-trade-index", + "name": "Retail trade index", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "-", - "download": "https://dasl.datadescription.com/download/data/3151", - "filename": "Cyclists-2015", - "name": "Cyclists 2015", + "description": "A sample from Fortune 500 companies", + "download": "https://dasl.datadescription.com/download/data/3434", + "filename": "Sales-profits", + "name": "Sales and profits", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "-", - "download": "https://dasl.datadescription.com/download/data/3154", - "filename": "Darts", - "name": "Darts", + "description": "Prices of homes in Saratoga NY along with facts about them. Good basis for multiple regressions to predict the Price of the house. But several predictors are collinear. ", + "download": "https://dasl.datadescription.com/download/data/3437", + "filename": "Saratoga-house-prices", + "name": "Saratoga house prices", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "In the National league all players take a turn at bat. But in the American league, a “designated hitter” usually bats for the pitcher, who is likely not to be a strong batter. The theory is that a designated hitter will lead to more hits, more runs, and a higher-scoring game. The data give the average runs per game and total home runs for major league baseball teams during the 2012 season. Is there a discernible difference between the leagues?", - "download": "https://dasl.datadescription.com/download/data/3159", - "filename": "Designated-hitter-2012", - "name": "Designated hitter 2012", + "description": "Prices of homes in Saratoga NY along with facts about them. Good basis for multiple regressions to predict the Price of the house. But several predictors are collinear. ", + "download": "https://dasl.datadescription.com/download/data/3436", + "filename": "Saratoga-houses", + "name": "Saratoga houses", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "Motorcycles designed to run off-road, often known as dirt bikes, are specialized\nvehicles. The dataset holds data on 114 many attributes of dirt bikes.\nSome cost as little as\n$1399, while others are substantially more expensive. One interest is in building a model to predict the price of a dirt bike from attributes of the bikes.", - "download": "https://dasl.datadescription.com/download/data/3166", - "filename": "Dirt-bikes", - "name": "Dirt bikes 2014", + "description": "A group of Statistics students cut ads out of magazines. They were careful to find two ads for each of 10 similar items, one with a sexual image and one without. They arranged the ads in random order and had 39 subjects look at them for one minute. Then they asked the subjects to list as many of the products as they could remember. Their data are shown in the table. Is there evidence that the sexual images mattered?", + "download": "https://dasl.datadescription.com/download/data/3444", + "filename": "Sex-sells", + "name": "Sex sells", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "A leading manufacturer of exercise\nequipment wanted to collect data on the effectiveness of\ntheir equipment. An August 2001 article in the journal\nMedicine and Science in Sports and Exercise compared how\nlong it would take men and women to burn 200 calories\nduring light or heavy workouts on various kinds of exercise\nequipment. The results summarized in the table are the average\ntimes for a group of physically active young men and\nwomen whose performances were measured on a representative\nsample of exercise equipment.", - "download": "https://dasl.datadescription.com/download/data/3195", - "filename": "Exercise-equipment", - "name": "Exercise equipment", + "description": "Researchers studying how a car’s fuel efficiency (in Miles Per Gallon) varies with its Speed drove a compact car 200 miles at various speeds on a test track. Their data are shown in the table. ", + "download": "https://dasl.datadescription.com/download/data/3454", + "filename": "Slower-is-cheaper", + "name": "Slower is cheaper", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "Football owners are constantly in competition for good players. The more wins, the more likely that the team will provide good business returns for the owners. The resources that each of the 32 teams has in the National Football League (NFL) vary, but the draft system is designed to counteract the advantages that wealthier teams may have.", - "download": "https://dasl.datadescription.com/download/data/3214", - "filename": "Football-salaries-2017", - "name": "Football salaries 2017", + "description": "The data give the federal rate on 3-month Treasury bills from 1950 to 1980 and Years Since 1950. ", + "download": "https://dasl.datadescription.com/download/data/3477", + "filename": "TBill-rates-2016", + "name": "TBill rates 2016", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "A student performed an experiment with three different grips to see what effect it might have on the distance of a backhanded Frisbee throw. She tried it with her normal grip, with one finger out, and with the Frisbee inverted. She measured in paces how far her throws went. ", - "download": "https://dasl.datadescription.com/download/data/3221", - "filename": "Frisbee-throws", - "name": "Frisbee throws", + "description": "Tiffany was founded in 1837, when Charles Lewis Tiffany opened his first store in downtown Manhattan. Tiffany retails and distributes a selection of Tiffany & Co. brand jewelry at a range of prices. Today, more than 150 Tiffany & Co. stores sell to customers in U.S. and international markets.\nThe dataset holds quarterly sales data from 2005 through the middle of 2017. The data are suitable for time series modeling.", + "download": "https://dasl.datadescription.com/download/data/3482", + "filename": "Tiffany", + "name": "Tiffany 2017", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "Golf courses", - "download": "https://dasl.datadescription.com/download/data/3245", - "filename": "Golf-courses", - "name": "Golf courses", + "description": "Time on market", + "download": "https://dasl.datadescription.com/download/data/3483", + "filename": "Time-on-market", + "name": "Time on market", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "The average drive distance (in yards) for 199 professional golfers during a week on the men’s PGA tour in 2015.", - "download": "https://dasl.datadescription.com/download/data/3246", - "filename": "Golf-drives-2015", - "name": "Golf drives 2015", + "description": "Are people who use tobacco products more likely to consume alcohol? Here are data on household spending (in pounds) taken by the British government on 11 regions in Great Britain. Do tobacco and alcohol spending appear to be related? What questions do you have about these data? What conclusions can you draw? ", + "download": "https://dasl.datadescription.com/download/data/3485", + "filename": "Tobacco-and-alcohol", + "name": "Tobacco and alcohol", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "Golfers 2017", - "download": "https://dasl.datadescription.com/download/data/3244", - "filename": "Golfers-2017", - "name": "Golfers 2017", + "description": "Daily closing stock prices for Toyota Motor Manufacturing from April 1, 2008, through June 21, 2013 ", + "download": "https://dasl.datadescription.com/download/data/3491", + "filename": "Toyota-stock-prices-2013", + "name": "Toyota stock prices 2013", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "The 2.5-mile Indianapolis Motor Speedway has\nbeen the home to a race on Memorial Day nearly every year\nsince 1911. Even during the first race, there were controversies.\nRalph Mulford was given the checkered flag first but took three\nextra laps just to make sure he’d completed 500 miles. When he\nfinished, another driver, Ray Harroun, was being presented with\nthe winner’s trophy, and Mulford’s protests were ignored. Harroun\naveraged 74.6 mph for the 500 miles. In 2013, the winner,\nTony Kanaan, averaged over 187 mph, beating the previous record\nby over 17 mph!", - "download": "https://dasl.datadescription.com/download/data/3288", - "filename": "Indy-2016", - "name": "Indy 500 2016", + "description": "US Unemployment rate from 1/1/2003 to 8/1/17. ", + "download": "https://dasl.datadescription.com/download/data/3507", + "filename": "Unemployment-2017", + "name": "Unemployment 2017", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "The 2.5-mile Indianapolis Motor Speedway has\nbeen the home to a race on Memorial Day nearly every year\nsince 1911. Even during the first race, there were controversies.\nRalph Mulford was given the checkered flag first but took three\nextra laps just to make sure he’d completed 500 miles. When he\nfinished, another driver, Ray Harroun, was being presented with\nthe winner’s trophy, and Mulford’s protests were ignored. Harroun\naveraged 74.6 mph for the 500 miles. In 2013, the winner,\nTony Kanaan, averaged over 187 mph, beating the previous record\nby over 17 mph!", - "download": "https://dasl.datadescription.com/download/data/3289", - "filename": "Indy-2017", - "name": "Indy 500 2017", + "description": "Kelly's Blue Book: https://www.kbb.com/cars-for-sale/ accessed on 31 Aug 2017 using zip code 94305 200 mile radius BMW M5", + "download": "https://dasl.datadescription.com/download/data/3508", + "filename": "Used-BMW", + "name": "Used BMW M5 2017", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "The 2.5-mile Indianapolis Motor Speedway has\nbeen the home to a race on Memorial Day nearly every year\nsince 1911. Even during the first race, there were controversies.\nRalph Mulford was given the checkered flag first but took three\nextra laps just to make sure he’d completed 500 miles. When he\nfinished, another driver, Ray Harroun, was being presented with\nthe winner’s trophy, and Mulford’s protests were ignored. Harroun\naveraged 74.6 mph for the 500 miles. In 2013, the winner,\nTony Kanaan, averaged over 187 mph, beating the previous record\nby over 17 mph!", - "download": "https://dasl.datadescription.com/download/data/3290", - "filename": "Indy-2018", - "name": "Indy 500 2018", + "description": "How does the age of a used car influence its price? This is a small enough data set to find a model with a calculator. ", + "download": "https://dasl.datadescription.com/download/data/3509", + "filename": "Used-cars", + "name": "Used cars 2014", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "The Kentucky Derby is a horse race\nthat has been run every year since 1875 at Churchill Downs\nin Louisville, Kentucky. The race started as a 1.5-mile race,\nbut in 1896, it was shortened to 1.25 miles because experts\nfelt that 3-year-old horses shouldn’t run such a long race that\nearly in the season. (It has been run in May every year but\none—1901—when it took place on April 29.)", - "download": "https://dasl.datadescription.com/download/data/3305", - "filename": "Kentucky-Derby-2016", - "name": "Kentucky Derby 2016", + "description": "The web site www.autotrader.com lists cars for sale. On January 22 2017,\nit listed 55 used Honda Civics for sale by owner. From those listings, we extracted the asking price ($), the mileage, and the model year (from which we computed the age of the car at the time the data were collected\nQuestions include how to best predict the price from mileage and age and whether any of the cars is a particularly good buy.\nOne care is a particularly old (1989) car that has relatively low mileage for such an old car. The seller claims it hasn’t been driven for several years. \nIt looks like Price might benefit from re-expression by logs.", + "download": "https://dasl.datadescription.com/download/data/3510", + "filename": "Used-Civics", + "name": "Used Civics 2017", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "The Kentucky Derby is a horse race\nthat has been run every year since 1875 at Churchill Downs\nin Louisville, Kentucky. The race started as a 1.5-mile race,\nbut in 1896, it was shortened to 1.25 miles because experts\nfelt that 3-year-old horses shouldn’t run such a long race that\nearly in the season. (It has been run in May every year but\none—1901—when it took place on April 29.)", - "download": "https://dasl.datadescription.com/download/data/3306", - "filename": "Kentucky-Derby-2017", - "name": "Kentucky Derby 2017", + "description": "The data give the gross domestic product (GDP) of the United States in trillions of 2009 dollars and time. ", + "download": "https://dasl.datadescription.com/download/data/3511", + "filename": "USGDP-2016", + "name": "USGDP 2016", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "The Kentucky Derby is a horse race\nthat has been run every year since 1875 at Churchill Downs\nin Louisville, Kentucky. The race started as a 1.5-mile race,\nbut in 1896, it was shortened to 1.25 miles because experts\nfelt that 3-year-old horses shouldn’t run such a long race that\nearly in the season. (It has been run in May every year but\none—1901—when it took place on April 29.)", - "download": "https://dasl.datadescription.com/download/data/3307", - "filename": "Kentucky-Derby-2018", - "name": "Kentucky Derby 2018", + "description": "Walmart revenue", + "download": "https://dasl.datadescription.com/download/data/3514", + "filename": "Walmart-revenue", + "name": "Walmart revenue", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "NY Marathon 2016", - "download": "https://dasl.datadescription.com/download/data/3370", - "filename": "NY-Marathon-2016", - "name": "NY Marathon 2016", + "description": "Gallup Poll of 1015 U.S. adults on April 9 – 12, 2015. Respondents were classified as high income (over $75,000), middle income ($30k–$75k), or low income (less than $30k). Those polled were asked for their views on redistributing U.S. wealth by heavily taxing the rich. Counts are reconstructed from percentages published by Gallup. ", + "download": "https://dasl.datadescription.com/download/data/3518", + "filename": "Wealth-Redistribution", + "name": "Wealth Redistribution", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "How are Olympic performances in various events related? The data gives winning long-jump and high-jump distances in meters, for the Summer Olympics from 1912 through 2016 ", - "download": "https://dasl.datadescription.com/download/data/3382", - "filename": "Olympic-jumps-2016", - "name": "Olympic jumps 2016", + "description": "Quarterly sales of Whole Foods Markets from 1995 through 2016. Whole Foods was purchased by Amazon in 2017, so 2016 is the final complete year prior to the merger. The data show a strong seasonal component even though food sales should not be seasonal. ", + "download": "https://dasl.datadescription.com/download/data/3522", + "filename": "Whole-Foods", + "name": "Whole Foods 2016", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "NFL data from the 2015 football season reported the number of yards gained by each of the league’s 488 receivers ", - "download": "https://dasl.datadescription.com/download/data/3425", - "filename": "Receivers-2015", - "name": "Receivers 2015", + "description": "Wine production", + "download": "https://dasl.datadescription.com/download/data/3529", + "filename": "Wine-production", + "name": "Wine production", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "Times (in minutes) for one runner to run 4 miles on various courses during a 10-year period. ", - "download": "https://dasl.datadescription.com/download/data/3433", - "filename": "Run-times", - "name": "Run times", + "description": "Quarterly median weekly earnings for U.S. women 25 years of age or older. Data are provided from the first quarter of 2003 through the first quarter of 2013. ", + "download": "https://dasl.datadescription.com/download/data/3535", + "filename": "Women-earnings", + "name": "Women’s weekly earnings 2013", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "Hill races are races that climb generally steep hills, held throughout Scotland throughout the year. The file holds records for men and women in these races the last time those were posted in an accessible table along with facts about the races. In particular, we know the length(km) and total climb(m). These are two independent predictors of the record times. Sex of the runner can be an additional indicator variable.", - "download": "https://dasl.datadescription.com/download/data/3440", - "filename": "Scottish-Hill-Races", - "name": "Scottish Hill Races", + "description": "Youth Unemployment 2016", + "download": "https://dasl.datadescription.com/download/data/3546", + "filename": "Youth-Unemployment-2016", + "name": "Youth Unemployment 2016", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true - }, + } + ], + "subcategory_name": "Economics" + }, + { + "datasets": [ { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "A college hockey coach collected data from the 2016–2017 National Hockey League season. He hopes to convince his players that the number of shots taken has an effect on the number of goals scored. The data includes both offensive and defensive players. ", - "download": "https://dasl.datadescription.com/download/data/3448", - "filename": "Shoot-to-Score-2016", - "name": "Shoot to Score 2016", + "description": " Alex Rodriguez (known to fans as A-Rod)was the youngest player ever to hit 500 home runs. The file holds the number of home runs hit by A-Rod during the 1994–2016 seasons. Describe the distribution, mentioning its shape and any unusual features. ", + "download": "https://dasl.datadescription.com/download/data/3038", + "filename": "a-rod-2016", + "name": "A-Rod 2016", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "Bjork Larsen was trying to decide whether to use a\nnew racing wax for cross-country skis. He decided that the\nwax would be worth the price if he could average less than\n55 seconds on a course he knew well, so he planned to study\nthe wax by racing on the course 8 times. The data report his race times. \n", - "download": "https://dasl.datadescription.com/download/data/3450", - "filename": "Ski-wax", - "name": "Ski wax", + "description": "In Olympic Archery both men and women start with a field of 64 qualifiers. Each archer shoots a round of 72 arrows (total possible score: 720) to establish a seeding position. Then they participate in a single-elimination contest. Thus, the seeding round is the only one that provides data for all archers (because some are […] ", + "download": "https://dasl.datadescription.com/download/data/3056", + "filename": "Archery", + "name": "Archery", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "The Men’s Giant Slalom skiing event consists of two runs whose times are added together for a final score. The data give the giant slalom times in the 2014 Winter Olympics at Sochi. ", - "download": "https://dasl.datadescription.com/download/data/3451", - "filename": "Slalom-times-2014", - "name": "Slalom times 2014", + "description": "American League baseball games are played under the designated hitter rule, meaning that pitchers, often weak hitters, do not come to bat. Baseball owners believe that the designated hitter rule means more runs scored, which in turn means higher attendance. Is there evidence that more fans attend games if the teams score more runs? The […] ", + "download": "https://dasl.datadescription.com/download/data/3057", + "filename": "Attendance-2016", + "name": "Attendance 2016", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "The Men’s Giant Slalom skiing event consists of two runs whose times are added together for a final score. The data give the giant slalom times in the 2018 Winter Olympics at PyeongChang. ", - "download": "https://dasl.datadescription.com/download/data/3452", - "filename": "Slalom-times-2018", - "name": "Slalom times 2018", + "description": "It has been suggested that children born in the summer have an advantage over their peers when it comes to sports, perhaps because they can be outdoors when they are young. The data report the number of professional ballplayers born in each month of the year for one season of professional baseball. ", + "download": "https://dasl.datadescription.com/download/data/3060", + "filename": "Ballplayer-births", + "name": "Ballplayer births", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "Advertisements for an instructional video claim that the techniques will improve the ability of Little League pitchers to throw strikes and that, after undergoing the training, players will be able to throw strikes on at least 60% of their pitches. To test this claim, we have 20 Little Leaguers throw 50 pitches each, and we record the number of strikes. After the players participate in the training program, we repeat the test. The table shows the number of strikes each player threw before and after the training.", - "download": "https://dasl.datadescription.com/download/data/3464", - "filename": "Strikes", - "name": "Strikes", + "description": "-", + "download": "https://dasl.datadescription.com/download/data/3063", + "filename": "Baseball-attendance", + "name": "Baseball attendance", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "Fifty nine countries won gold medals in the 2016 Summer Olympics. The dataset lists them, along with the total number of gold medals each won. It can be a challenge to find a good display for data like these. ", - "download": "https://dasl.datadescription.com/download/data/3468", - "filename": "Summer-Olympics-2016", - "name": "Summer Olympics 2016", + "description": "-", + "download": "https://dasl.datadescription.com/download/data/3064", + "filename": "Baseball-circumferences", + "name": "Baseball circumferences", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "Super Bowl 2016", - "download": "https://dasl.datadescription.com/download/data/3470", - "filename": "Super-Bowl-2016", - "name": "Super Bowl 2016", + "description": "Ballplayers have been signing ever larger contracts. The highest salaries (in millions of dollars per season) for each year since 1874 are in the data file. ", + "download": "https://dasl.datadescription.com/download/data/3065", + "filename": "Baseball-salaries-2015", + "name": "Baseball salaries 2015", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "Swim and Run", - "download": "https://dasl.datadescription.com/download/data/3577", - "filename": "Swim-Run", - "name": "Swim and Run", + "description": "-", + "download": "https://dasl.datadescription.com/download/data/3066", + "filename": "Baseball-salaries-2016", + "name": "Baseball salaries 2016", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "People swim across Lake Ontario from Niagara on the Lake to Toronto–a distance of 52 km (32,3 miles). Because the lake is fresh water, this swim is considered more difficult than ocean swims of similar length because salt water provides more boyancy than fresh water. (For comparison, the English Channel is 21 miles across and, despite strong currents, generally takes less time to cross.)", - "download": "https://dasl.datadescription.com/download/data/3473", - "filename": "Swim-lake", - "name": "Swim the lake 2016", + "description": "-", + "download": "https://dasl.datadescription.com/download/data/3067", + "filename": "Baseball-weights", + "name": "Baseball weights", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "Unlike track events, swimming heats are not determined at random. Instead, swimmers are seeded so that better swimmers are placed in later heats. Here are the times (in seconds) for the women’s 400-m freestyle for two heats in the 2016 Olympics. ", - "download": "https://dasl.datadescription.com/download/data/3471", - "filename": "Swimming-heats", - "name": "Swimming heats 2016", + "description": "-", + "download": "https://dasl.datadescription.com/download/data/3069", + "filename": "Basketball-shots", + "name": "Basketball shots", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "Swimming heats London", - "download": "https://dasl.datadescription.com/download/data/3472", - "filename": "Swimming-heats-London", - "name": "Swimming heats London", + "description": "A company that makes basketballs has the motto: “Our basketballs are ready to play.” Therefore, it is important to the company that the basketballs are inflated with the proper amount of air when shipped. Most basketballs are inflated to 7 to 9 pounds per square inch. Recently the company selected a random basketball from its […] ", + "download": "https://dasl.datadescription.com/download/data/3068", + "filename": "Basketballs", + "name": "Basketballs", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "The Tour de France is the most famous bicycle race in the world. It has been run every year since 1903, except for a few during wars. The data report facts about the winners including age, time, distance, and average speed. Lance Armstrong’s 7 consecutive victories been disqualified due to the use of performance-enhancing drugs, but his statistics are still included here.", - "download": "https://dasl.datadescription.com/download/data/3489", - "filename": "Tour-de-France-2016", - "name": "Tour de France 2016", + "description": "The Belmont Stakes is the last and longest of the three horse races that make up the Triple Crown. Curiously, in some of the Belmont races horses have run clockwise around the track, and in others they have run counterclockwise. Do the horses care? But note that the length of the race has also not […] ", + "download": "https://dasl.datadescription.com/download/data/3072", + "filename": "Belmont-stakes-2015", + "name": "Belmont stakes 2015", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "The Tour de France is the most famous bicycle race in the world. It has been run every year since 1903, except for a few during wars. The data report facts about the winners including age, time, distance, and average speed. Lance Armstrong’s 7 consecutive victories been disqualified due to the use of performance-enhancing drugs, but his statistics are still included here.", - "download": "https://dasl.datadescription.com/download/data/3490", - "filename": "Tour-de-France-2017", - "name": "Tour de France 2017", + "description": "-", + "download": "https://dasl.datadescription.com/download/data/3151", + "filename": "Cyclists-2015", + "name": "Cyclists 2015", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "The Gallup poll asked 1008 Americans age 18 and over whether they planned to watch the upcoming Super Bowl. The pollster also asked those who planned to watch whether they were looking forward more to seeing the football game or the commercials. ", - "download": "https://dasl.datadescription.com/download/data/3516", - "filename": "Watch-Super-bowl", - "name": "Watch the Super bowl", + "description": "-", + "download": "https://dasl.datadescription.com/download/data/3154", + "filename": "Darts", + "name": "Darts", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "The world men’s weightlifting records are categorized by weight class of the competitors. How does the weight class relate to the record? ", - "download": "https://dasl.datadescription.com/download/data/3520", - "filename": "Weightlifting-2016", - "name": "Weightlifting 2016", + "description": "In the National league all players take a turn at bat. But in the American league, a “designated hitter” usually bats for the pitcher, who is likely not to be a strong batter. The theory is that a designated hitter will lead to more hits, more runs, and a higher-scoring game. The data give the average runs per game and total home runs for major league baseball teams during the 2012 season. Is there a discernible difference between the leagues?", + "download": "https://dasl.datadescription.com/download/data/3159", + "filename": "Designated-hitter-2012", + "name": "Designated hitter 2012", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "The Boston Marathon has had a wheelchair division since 1977.\nWho do you think\nis typically faster, the men’s marathon winner on foot\nor the women’s wheelchair marathon winner? Because\nthe conditions differ from year to year, and speeds have\nimproved over the years, it seems best to treat these as\npaired measurements. Here are summary statistics for\nthe pairwise differences in finishing time (in minutes):", - "download": "https://dasl.datadescription.com/download/data/3521", - "filename": "Wheelchair-Marathon", - "name": "Wheelchair Marathon 2016", + "description": "Motorcycles designed to run off-road, often known as dirt bikes, are specialized\nvehicles. The dataset holds data on 114 many attributes of dirt bikes.\nSome cost as little as\n$1399, while others are substantially more expensive. One interest is in building a model to predict the price of a dirt bike from attributes of the bikes.", + "download": "https://dasl.datadescription.com/download/data/3166", + "filename": "Dirt-bikes", + "name": "Dirt bikes 2014", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "The Sears Cup was established in 1993\nto honor institutions that maintain a broad-based athletic\nprogram, achieving success in many sports, both men’s and\nwomen’s. In the years following its Division III inception in\n1995, the cup was won by Williams College 15 of 17 years.\nWhy did the football team win so much? Was it because\nthey were heavier than their opponents? The data gives the\naverage team weights for selected years from 1973 to 1993.", - "download": "https://dasl.datadescription.com/download/data/3525", - "filename": "Williams-football", - "name": "Williams football", + "description": "A leading manufacturer of exercise\nequipment wanted to collect data on the effectiveness of\ntheir equipment. An August 2001 article in the journal\nMedicine and Science in Sports and Exercise compared how\nlong it would take men and women to burn 200 calories\nduring light or heavy workouts on various kinds of exercise\nequipment. The results summarized in the table are the average\ntimes for a group of physically active young men and\nwomen whose performances were measured on a representative\nsample of exercise equipment.", + "download": "https://dasl.datadescription.com/download/data/3195", + "filename": "Exercise-equipment", + "name": "Exercise equipment", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "The times from the first race of the women’s 2 X 500-m speed skating times at the 2010 Winter Olympics in Vancouver, B.C. are given. ", - "download": "https://dasl.datadescription.com/download/data/3530", - "filename": "speed-skating", - "name": "Winter Olympics 2010 speed skating", + "description": "Football owners are constantly in competition for good players. The more wins, the more likely that the team will provide good business returns for the owners. The resources that each of the 32 teams has in the National Football League (NFL) vary, but the draft system is designed to counteract the advantages that wealthier teams may have.", + "download": "https://dasl.datadescription.com/download/data/3214", + "filename": "Football-salaries-2017", + "name": "Football salaries 2017", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "Source: https://www.olympic.org/sochi-2014/alpine-skiing/slalom-men", - "download": "https://dasl.datadescription.com/download/data/3531", - "filename": "Winter-Olympics-2014", - "name": "Winter Olympics 2014", + "description": "A student performed an experiment with three different grips to see what effect it might have on the distance of a backhanded Frisbee throw. She tried it with her normal grip, with one finger out, and with the Frisbee inverted. She measured in paces how far her throws went. ", + "download": "https://dasl.datadescription.com/download/data/3221", + "filename": "Frisbee-throws", + "name": "Frisbee throws", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "53 men completed the men’s alpine downhill. The gold medal winner finished in 100.25 seconds. Here are the times (in seconds) for all competitors. ", - "download": "https://dasl.datadescription.com/download/data/3532", - "filename": "olympics-downhill", - "name": "Winter olympics 2018 downhill", + "description": "Golf courses", + "download": "https://dasl.datadescription.com/download/data/3245", + "filename": "Golf-courses", + "name": "Golf courses", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "he women’s heptathlon in the Olympics consists of seven track-and-field events: the 200 m and 800 m runs, 100 m high hurdles, shot put, javelin, high jump, and long jump. Each contestant is awarded points for each event based on her performance. So, which performance deserves more points? It’s not clear how to compare them. They aren’t measured in the same units, or even in the same direction (longer jumps are better but shorter times are better.)", - "download": "https://dasl.datadescription.com/download/data/3536", - "filename": "Womens-Heptathlon", - "name": "Womens Heptathlon 2016", + "description": "The average drive distance (in yards) for 199 professional golfers during a week on the men’s PGA tour in 2015.", + "download": "https://dasl.datadescription.com/download/data/3246", + "filename": "Golf-drives-2015", + "name": "Golf drives 2015", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "The Women’s 500 metres in short track speed skating at the 2018 Winter Olympics took place from 10 to 13 February 2018 at the Gangneung Ice Arena in Gangneung, South Korea.The defending champion from 2014, Li Jianrou, had retired, but the 2014 silver medalist Arianna Fontana competed and eventually won the event. ", - "download": "https://dasl.datadescription.com/download/data/3537", - "filename": "Womens-short-track", - "name": "Womens short track 2018", + "description": "Golfers 2017", + "download": "https://dasl.datadescription.com/download/data/3244", + "filename": "Golfers-2017", + "name": "Golfers 2017", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "The women’s 1500 metres speed skating competition for the 2006 Winter Olympics was held in Turin, Italy, on 22 February ", - "download": "https://dasl.datadescription.com/download/data/3538", - "filename": "Womens-speed-skating", - "name": "Womens speed skating 2006", + "description": "The 2.5-mile Indianapolis Motor Speedway has\nbeen the home to a race on Memorial Day nearly every year\nsince 1911. Even during the first race, there were controversies.\nRalph Mulford was given the checkered flag first but took three\nextra laps just to make sure he’d completed 500 miles. When he\nfinished, another driver, Ray Harroun, was being presented with\nthe winner’s trophy, and Mulford’s protests were ignored. Harroun\naveraged 74.6 mph for the 500 miles. In 2013, the winner,\nTony Kanaan, averaged over 187 mph, beating the previous record\nby over 17 mph!", + "download": "https://dasl.datadescription.com/download/data/3288", + "filename": "Indy-2016", + "name": "Indy 500 2016", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "http://jse.amstat.org/datasets/ballbearings.txt", - "download": "http://jse.amstat.org/datasets/ballbearings.dat.txt", - "filename": "ballbearings", - "name": "Ball Bearing Reliability Data", + "description": "The 2.5-mile Indianapolis Motor Speedway has\nbeen the home to a race on Memorial Day nearly every year\nsince 1911. Even during the first race, there were controversies.\nRalph Mulford was given the checkered flag first but took three\nextra laps just to make sure he’d completed 500 miles. When he\nfinished, another driver, Ray Harroun, was being presented with\nthe winner’s trophy, and Mulford’s protests were ignored. Harroun\naveraged 74.6 mph for the 500 miles. In 2013, the winner,\nTony Kanaan, averaged over 187 mph, beating the previous record\nby over 17 mph!", + "download": "https://dasl.datadescription.com/download/data/3289", + "filename": "Indy-2017", + "name": "Indy 500 2017", "number_format": 31, "remove_quotes": true, - "separator": "SPACE", + "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, - "use_first_row_for_vectorname": false + "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "http://jse.amstat.org/datasets/baseball.txt", - "download": "http://jse.amstat.org/datasets/baseball.dat.txt", - "filename": "baseball_", - "name": "baseball", + "description": "The 2.5-mile Indianapolis Motor Speedway has\nbeen the home to a race on Memorial Day nearly every year\nsince 1911. Even during the first race, there were controversies.\nRalph Mulford was given the checkered flag first but took three\nextra laps just to make sure he’d completed 500 miles. When he\nfinished, another driver, Ray Harroun, was being presented with\nthe winner’s trophy, and Mulford’s protests were ignored. Harroun\naveraged 74.6 mph for the 500 miles. In 2013, the winner,\nTony Kanaan, averaged over 187 mph, beating the previous record\nby over 17 mph!", + "download": "https://dasl.datadescription.com/download/data/3290", + "filename": "Indy-2018", + "name": "Indy 500 2018", "number_format": 31, "remove_quotes": true, - "separator": "SPACE", + "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, - "use_first_row_for_vectorname": false + "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "This data set contains every NCAA Basketball Tournament game \never played. The tournament has been held every year since 1939.", - "download": "http://jse.amstat.org/datasets/basketball.dat.txt", - "filename": "Basketball", - "name": "NCAA Basketball Tournament Data", + "description": "The Kentucky Derby is a horse race\nthat has been run every year since 1875 at Churchill Downs\nin Louisville, Kentucky. The race started as a 1.5-mile race,\nbut in 1896, it was shortened to 1.25 miles because experts\nfelt that 3-year-old horses shouldn’t run such a long race that\nearly in the season. (It has been run in May every year but\none—1901—when it took place on April 29.)", + "download": "https://dasl.datadescription.com/download/data/3305", + "filename": "Kentucky-Derby-2016", + "name": "Kentucky Derby 2016", "number_format": 31, "remove_quotes": true, - "separator": "SPACE", + "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, - "use_first_row_for_vectorname": false + "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "Data are provided for Barry Bonds' plate appearances in the 2001\nbaseball season. Variables include characteristics of the innings\nbefore the first pitch to Bonds (e.g., the number of outs, the number\nof runners on each base, the score, the opposing pitcher's earned run\naverage) and after the first pitch to Bonds (e.g., the outcome of the\nappearance, how many runs scored in the inning after Bonds hits).", - "download": "http://jse.amstat.org/datasets/bonds2001.dat.txt", - "filename": "Bonds", - "name": "Barry Bonds' 2001 Plate Appearances", + "description": "The Kentucky Derby is a horse race\nthat has been run every year since 1875 at Churchill Downs\nin Louisville, Kentucky. The race started as a 1.5-mile race,\nbut in 1896, it was shortened to 1.25 miles because experts\nfelt that 3-year-old horses shouldn’t run such a long race that\nearly in the season. (It has been run in May every year but\none—1901—when it took place on April 29.)", + "download": "https://dasl.datadescription.com/download/data/3306", + "filename": "Kentucky-Derby-2017", + "name": "Kentucky Derby 2017", "number_format": 31, "remove_quotes": true, - "separator": "SPACE", + "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, - "use_first_row_for_vectorname": false + "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "The dataset contains the scores, opponents, and sites of the 18 Big Ten\nmen's basketball games that involved the University of Iowa in 1997.", - "download": "http://jse.amstat.org/datasets/hawks.dat.txt", - "filename": "hawks", - "name": " 1997 University of Iowa Big Ten Basketball Data", + "description": "The Kentucky Derby is a horse race\nthat has been run every year since 1875 at Churchill Downs\nin Louisville, Kentucky. The race started as a 1.5-mile race,\nbut in 1896, it was shortened to 1.25 miles because experts\nfelt that 3-year-old horses shouldn’t run such a long race that\nearly in the season. (It has been run in May every year but\none—1901—when it took place on April 29.)", + "download": "https://dasl.datadescription.com/download/data/3307", + "filename": "Kentucky-Derby-2018", + "name": "Kentucky Derby 2018", "number_format": 31, "remove_quotes": true, - "separator": "SPACE", + "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, - "use_first_row_for_vectorname": false + "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "The dataset consists of game-by-game information for the 1998 season\nfor Mark McGwire and the St. Louis Cardinals, and Sammy Sosa and the\nChicago Cubs. The dataset includes information on the home run hitting\nof these two players, as well as game results for the teams.", - "download": "http://jse.amstat.org/datasets/homerun.dat.txt", - "filename": "homerun", - "name": "The 1998 Home Run Race Between Mark McGwire and Sammy Sosa", + "description": "NY Marathon 2016", + "download": "https://dasl.datadescription.com/download/data/3370", + "filename": "NY-Marathon-2016", + "name": "NY Marathon 2016", "number_format": 31, "remove_quotes": true, - "separator": "SPACE", + "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, - "use_first_row_for_vectorname": false + "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "Data are from The Baseball Encyclopedia (1993) and Total Baseball (2001). \nThey include the location, league affiliation (National or American), \ndivision affiliation (East, Central, or West), season of play, home game \nattendance, runs scored, runs allowed, wins, losses, and number of games \nbehind the division leader for each major league franchise for the 1969 \nthrough 2000 seasons. Other data (including opening dates for new stadia, \nand dates of work stoppages) were collected from Ballparks by Munsey and \nSuppes (2001) and InfoPlease (2001).", - "download": "http://jse.amstat.org/datasets/MLBattend.dat.txt", - "filename": "MLBattend", - "name": "1969-2000 Major League Baseball Attendance data", + "description": "How are Olympic performances in various events related? The data gives winning long-jump and high-jump distances in meters, for the Summer Olympics from 1912 through 2016 ", + "download": "https://dasl.datadescription.com/download/data/3382", + "filename": "Olympic-jumps-2016", + "name": "Olympic jumps 2016", "number_format": 31, "remove_quotes": true, - "separator": "SPACE", + "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, - "use_first_row_for_vectorname": false + "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "Driver results for all NASCAR races between 1975 1nd 2003, inclusive. The dataset constitutes all \nparticipants in each of 898 races, and includes their start/finish postions, prize winnings, car \nmake and laps completed.", - "download": "http://jse.amstat.org/datasets/nascard.dat.txt", - "filename": "nascard", - "name": "NASCAR Driver Results", + "description": "NFL data from the 2015 football season reported the number of yards gained by each of the league’s 488 receivers ", + "download": "https://dasl.datadescription.com/download/data/3425", + "filename": "Receivers-2015", + "name": "Receivers 2015", "number_format": 31, "remove_quotes": true, - "separator": "SPACE", + "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, - "use_first_row_for_vectorname": false + "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "Race results for all NASCAR Winston Cup races between 1975 and 2003, inclusive. The dataset \nincludes the numbers of cars, total prize winnings, monthly consumer price index for the month \nof the race, track length, laps completed by the winner, spatial co-ordinates and name of track.", - "download": "http://jse.amstat.org/datasets/nascarr.dat.txt", - "filename": "nascarr", - "name": "NASCAR Race Results", + "description": "Times (in minutes) for one runner to run 4 miles on various courses during a 10-year period. ", + "download": "https://dasl.datadescription.com/download/data/3433", + "filename": "Run-times", + "name": "Run times", "number_format": 31, "remove_quotes": true, - "separator": "SPACE", + "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, - "use_first_row_for_vectorname": false + "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "This data set contains performance statistics for National \nFootball League (NFL) teams for their 2000 regular season.\n Columns Variable Description\n 1 - 3 initials team initials\n 5 - 26 team name and location of the team\n 28 - 29 wins wins\n 31 - 32 losses losses\n 34 - 35 homedrives50 drives begun in opponents' territory\n 37 - 38 homedrives20 drives begun within 20 yards of the goal\n 40 - 41 oppdrives50 opponents drives begun in team's territory\n 43 oppdrives20 opponents drives begun within 20 yards of goal\n 45 homepuntblock punts blocked by team\n 47 opppuntblock punts team had blocked\n 49 - 50 hometouch touchdowns scored by team\n 52 - 53 opptouch touchdowns scored against team\n 55 - 58 homeyards total yardage gained by offense\n 60 - 63 oppyards total yardage allowed by defense\n 65 - 68 hometop time of possession by offense (in minutes)\n 70 - 73 opptop time of possession by opponents' offense\n 75 - 76 homefgm field goals made\n 78 - 79 oppfgm field goals allowed to opponents\n 81 - 82 homefga field goals attempted\n 84 - 85 oppfga field goals attempted by opponents\n 87 - 89 opppuntno punts made by opponents\n 91 - 94 opppuntave average length of punts made by opponents\n 96 - 99 opppuntnet average change in field position \n during opponents' punts\n101 - 102 opppunttb opponents' punts taken for touchbacks\n104 - 105 opppunt20 opponents' punts that resulted in the team's\n offense beginning within 20 yards of their \n own (defensive) goal line\n107 - 108 opppuntlong longest opponents' punt\n110 - 112 homepuntno punts made by team\n114 - 117 homepuntave average length of punts made by team\n119 - 122 homepuntnet average change in field position \n during team's punts\n124 - 125 homepunttb team's punts taken for touchbacks\n127 - 128 homepunt20 team's punts that resulted in the opponents'\n offense beginning within 20 yards of their \n own (defensive) goal line\n130 - 131 homepuntlong longest team punt\n133 - 135 home1sts first downs obtained by offense\n137 - 139 opp1sts first downs allowed by defense\n141 - 142 homesacks sacks achieved by team's defense\n144 - 145 oppsacks sacks allowed by team's offense\n147 - 148 homekos kickoffs made by team\n150 - 151 oppkos kickoffs received by team\n153 - 156 homekoyds yards gained during kickoff returns\n158 - 161 oppkoyds yards allowed to opposition during kickoff returns\n163 - 166 homekoave average yards gained during kickoff returns\n168 - 171 oppkoave average yards allowed during kickoff returns\n173 - 175 homekolong longest kickoff return made by team\n177 - 179 oppkolong longest kickoff return allowed by team\n181 homekotds kickoffs returned for a touchdown by team\n183 oppkotds kickoffs returned for touchdown by opposition\n185 - 186 homerets punts returned by team\n188 - 189 opprets punts returned by opposition\n191 - 192 homefc punts \"fair caught\" by team\n194 - 195 oppfc punts \"fair caught\" by opposition\n197 - 199 homeretyds return yardage on punts by team\n201 - 203 oppretyds return yardage on punts by opposition\n205 - 208 homeretave average length of punt returns by team\n210 - 213 oppretave average length of punt returns by opposition\n215 homerettds punts returned by team for a touchdown\n217 opprettds punts returned by opponents for a touchdown\n219 - 220 homeint interceptions made by team's defense\n222 - 223 oppint interceptions made against team's offense\n225 - 226 homerecover fumbles recovered by team's defense\n228 - 229 opprecover fumbles recovered by opposing defenses\n231 - 232 numgames games played by team\n234 - 237 opprateyds average number of yards gained \n per minute of possession by opponents\n239 - 242 homerateyds average number of yards gained \n per minute of possession by team\n244 - 247 opppuntrate average number of punts \n per minute of possession by opponents\n249 - 252 homepuntrate average number of punts \n per minute of possession by team\n254 - 258 oppratetd average number of touchdowns \n per minute of possession by opponents\n260 - 264 homeratetd average number of touchdowns \n per minute of possession by team\n266 - 269 winpercent winning percentage\n271 - 275 hometorate turnovers obtained by team,\n per minute of possession by opponents\n277 - 281 opptorate turnovers allowed by team, \n per minute of possession\n283 - 286 home1rate first downs obtained by team, \n per minute of possession\n288 - 291 opp1rate first downs allowed by team's defense, \n per minute of possession by opposition\n293 - 295 homepoints points scored by team\n297 - 299 opppoints points scored against team\n301 - 303 conference conference to which the team belongs (AFC or NFC)", - "download": "http://jse.amstat.org/datasets/nfl2000.dat.txt", - "filename": "nfl2000", - "name": "NFL Y2K PCA", + "description": "Hill races are races that climb generally steep hills, held throughout Scotland throughout the year. The file holds records for men and women in these races the last time those were posted in an accessible table along with facts about the races. In particular, we know the length(km) and total climb(m). These are two independent predictors of the record times. Sex of the runner can be an additional indicator variable.", + "download": "https://dasl.datadescription.com/download/data/3440", + "filename": "Scottish-Hill-Races", + "name": "Scottish Hill Races", "number_format": 31, "remove_quotes": true, - "separator": "SPACE", + "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, - "use_first_row_for_vectorname": false + "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "Four datasets (nfl93.dat, nfl94.dat, nfl95.dat, nfl96.dat) contain\nNational Football League game results for recent seasons. In addition\nto game scores, the datasets give oddsmakers' pointspreads and\nover/under values for each game.", - "download": "http://jse.amstat.org/datasets/nfl93.dat.txt", - "filename": "nfl93", - "name": "NFL Scores and Pointspreads", + "description": "A college hockey coach collected data from the 2016–2017 National Hockey League season. He hopes to convince his players that the number of shots taken has an effect on the number of goals scored. The data includes both offensive and defensive players. ", + "download": "https://dasl.datadescription.com/download/data/3448", + "filename": "Shoot-to-Score-2016", + "name": "Shoot to Score 2016", "number_format": 31, "remove_quotes": true, - "separator": "SPACE", + "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, - "use_first_row_for_vectorname": false + "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "Four datasets (nfl93.dat, nfl94.dat, nfl95.dat, nfl96.dat) contain\nNational Football League game results for recent seasons. In addition\nto game scores, the datasets give oddsmakers' pointspreads and\nover/under values for each game.", - "download": "http://jse.amstat.org/datasets/nfl94.dat.txt", - "filename": "nfl94", - "name": "NFL Scores and Pointspreads", + "description": "Bjork Larsen was trying to decide whether to use a\nnew racing wax for cross-country skis. He decided that the\nwax would be worth the price if he could average less than\n55 seconds on a course he knew well, so he planned to study\nthe wax by racing on the course 8 times. The data report his race times. \n", + "download": "https://dasl.datadescription.com/download/data/3450", + "filename": "Ski-wax", + "name": "Ski wax", "number_format": 31, "remove_quotes": true, - "separator": "SPACE", + "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, - "use_first_row_for_vectorname": false + "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "Four datasets (nfl93.dat, nfl94.dat, nfl95.dat, nfl96.dat) contain\nNational Football League game results for recent seasons. In addition\nto game scores, the datasets give oddsmakers' pointspreads and\nover/under values for each game.", - "download": "http://jse.amstat.org/datasets/nfl95.dat.txt", - "filename": "nfl95", - "name": "NFL Scores and Pointspreads", + "description": "The Men’s Giant Slalom skiing event consists of two runs whose times are added together for a final score. The data give the giant slalom times in the 2014 Winter Olympics at Sochi. ", + "download": "https://dasl.datadescription.com/download/data/3451", + "filename": "Slalom-times-2014", + "name": "Slalom times 2014", "number_format": 31, "remove_quotes": true, - "separator": "SPACE", + "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, - "use_first_row_for_vectorname": false + "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "Four datasets (nfl93.dat, nfl94.dat, nfl95.dat, nfl96.dat) contain\nNational Football League game results for recent seasons. In addition\nto game scores, the datasets give oddsmakers' pointspreads and\nover/under values for each game.", - "download": "http://jse.amstat.org/datasets/nfl96.dat.txt", - "filename": "nfl96", - "name": " NFL Scores and Pointspreads", + "description": "The Men’s Giant Slalom skiing event consists of two runs whose times are added together for a final score. The data give the giant slalom times in the 2018 Winter Olympics at PyeongChang. ", + "download": "https://dasl.datadescription.com/download/data/3452", + "filename": "Slalom-times-2018", + "name": "Slalom times 2018", "number_format": 31, "remove_quotes": true, - "separator": "SPACE", + "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, - "use_first_row_for_vectorname": false + "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "The dataset contains scores for all regular season National Football\nLeague games from the 1998, 1999 and 2000 seasons. In addition to \nthe points scored by the home and visiting teams in each game, the\ndataset contains a pointspread that handicaps each game.\n\nColumns \n 1 - 4 Year (1998, 1999, or 2000)\n 6 - 7 Week of the season (1 to 17)\n10 - 27 Home team name\n29 - 30 Home team score\n33 - 50 Visiting team name\n52 - 53 Visiting team score\n56 - 60 Pointspread ", - "download": "http://jse.amstat.org/datasets/nfl98-00.dat.txt", - "filename": "nfl98-00", - "name": " NFL Scores for 1998-2000", + "description": "Advertisements for an instructional video claim that the techniques will improve the ability of Little League pitchers to throw strikes and that, after undergoing the training, players will be able to throw strikes on at least 60% of their pitches. To test this claim, we have 20 Little Leaguers throw 50 pitches each, and we record the number of strikes. After the players participate in the training program, we repeat the test. The table shows the number of strikes each player threw before and after the training.", + "download": "https://dasl.datadescription.com/download/data/3464", + "filename": "Strikes", + "name": "Strikes", "number_format": 31, "remove_quotes": true, - "separator": "SPACE", + "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, - "use_first_row_for_vectorname": false + "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": " The data set provides the weights (in lbs)\nof the 26 men on the 1996 US Olympic Rowing Team in Atlanta. The\ndata includes the names of the participants and which event they\nrowed in. The US team participated in 7 of the 8 possible events.\nThis data set is useful for discussing outliers,\nexplanations for outliers, and comparing the robustness of the\nmean and the median.\n\n", - "download": "http://jse.amstat.org/datasets/rowing.dat.txt", - "filename": "rowing", - "name": " Weights of 1996 US Olympic Rowing Team", + "description": "Fifty nine countries won gold medals in the 2016 Summer Olympics. The dataset lists them, along with the total number of gold medals each won. It can be a challenge to find a good display for data like these. ", + "download": "https://dasl.datadescription.com/download/data/3468", + "filename": "Summer-Olympics-2016", + "name": "Summer Olympics 2016", "number_format": 31, "remove_quotes": true, - "separator": "SPACE", + "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, - "use_first_row_for_vectorname": false + "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "Data consist of 500-yard freestyle swim times for male and female swimmers age 50-94 in a biennial national competition. Variables include year, gender, age, age group, swim time, seed time (qualifying time from state competition), and split times (in each 50-yard segment).", - "download": "http://jse.amstat.org/v22n1/doane/SeniorSwimTimes-DataSet.txt", - "filename": "SeniorSwimTimes", - "name": "SeniorSwimTimes", + "description": "Super Bowl 2016", + "download": "https://dasl.datadescription.com/download/data/3470", + "filename": "Super-Bowl-2016", + "name": "Super Bowl 2016", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "Data are provided for the 56 Tour De France bicycle races since World \nWar II. The year and dates of the event, the total number of stages, \nthe total distance, the winning total time and average speed, the name \nand country of the winner, the birth date of the winner, and the \nwinner's age at the time of victory are the variables in the dataset.", - "download": "http://jse.amstat.org/datasets/tdf.dat.txt", - "filename": "tdf", - "name": "Tour De France Winners (Can Lance Win Six?)", + "description": "Swim and Run", + "download": "https://dasl.datadescription.com/download/data/3577", + "filename": "Swim-Run", + "name": "Swim and Run", "number_format": 31, "remove_quotes": true, - "separator": "SPACE", + "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true - } - ], - "subcategory_name": "Sport" - }, - { - "datasets": [ + }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "After purchasing a batch of flight helmets that did not fit the heads of many pilots, the NZ Airforce decided to mesure the headsizes of all recruits. Before this was carried out, information was collected to determine the feasibility of using cheap cardboard callipers to make the measurements, instead of metal ones which were expensive and uncomfortable. The data lists the head diameters of 18 recruits measured once using cardboard callipers and again using metal callipers. One question is whether there is any systematic difference between the two sets of callipers. One might also ask whether there is more variability in the cardboard callipers measurement than that of the metal callipers. ", - "download": "http://www.statsci.org/data/oz/nzhelmet.txt", - "filename": "nzhelmet", - "name": "Helmet Sizes for New Zealand Airforce", + "description": "People swim across Lake Ontario from Niagara on the Lake to Toronto–a distance of 52 km (32,3 miles). Because the lake is fresh water, this swim is considered more difficult than ocean swims of similar length because salt water provides more boyancy than fresh water. (For comparison, the English Channel is 21 miles across and, despite strong currents, generally takes less time to cross.)", + "download": "https://dasl.datadescription.com/download/data/3473", + "filename": "Swim-lake", + "name": "Swim the lake 2016", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "These data were collected as part of a project for the Federal Office for Road Safety conducted by the Research Institute of Gender and Health at the University of Newcastle. There is evidence that women drivers who are involved in motor vehicle accidents are more likely than men to be injured. A possible reason is that women often drive smaller cars that provide less protection in a collision. One of the aims of the project was to examine preferences for cars among men and women and investigate the extent to which safety was a factor in determining preferences. \nThe survey was conducted by research assistants who asked people in car parks to participate and administered a structured questionnaire. They were instructed to obtain data from men and women with small, medium and large cars, with 50 people per group for a total of 300 respondents. (The sample size was based on power requirements for another part of the survey that involved anthropometric measurements.) The research assistants approached people in car parks of the University of Newcastle and nearby shopping centres during December 1997 and January 1998. \nThe data consist of 300 records each with 22 variables. The variables are: \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nID\n\nIdentification number of respondent\n\nAge\n\nAge of respondent (years)\n\nSex\n\n1=female, 2=male\n\nLicYr\n\nTime they have held a full driving licence, in years and months (years)\n\nLicMth\n\nTime they have held a full driving licence, in years and months (months)\n\nActCar\n\nMake, model and year of car most often driven, coded to size of car 1=small, 2=medium, 3=large\n\nKids5\n\nChildren under five, 1=yes, 2=no\n\nKids6\n\nChildren 6 to 16, 1=yes, 2=no\n\nPrefCar\n\nPreferred car, coded to size of car 1=small, 2=medium, 3=large\n\nCar15k\n\nPreferred type of car if cost $15000, 1=small new car; 2=large second-hand car\n\nReason\n\n1=safety, 2=reliability, 3=cost, 4=performance, 5=comfort, 6=looks\n\nCost\n\nHow important is cost when buying a car? 1=not important, 2=little importance, 3=important, 4=very important\n\nReliable\n\nHow important is reliability ...?\n\nPerform\n\nHow important is performance ...?\n\nFuel\n\nHow important is fuel consumption ...?\n\nSafety\n\nHow important is safety ...?\n\nAC/PS\n\nHow important is air conditioning/power steering ...?\n\nPark\n\nHow important is ease of parking ...?\n \nRoom\n \nHow important is space/roominess ...?\n \nDoors\n \nHow important is the number of doors ...?\n \nPrestige\n \nHow important is prestige/style ...?\n \nColour\n \nHow important is colour ...?\n", - "download": "http://www.statsci.org/data/oz/carprefs.txt", - "filename": "carprefs", - "name": "Car Preferences", + "description": "Unlike track events, swimming heats are not determined at random. Instead, swimmers are seeded so that better swimmers are placed in later heats. Here are the times (in seconds) for the women’s 400-m freestyle for two heats in the 2016 Olympics. ", + "download": "https://dasl.datadescription.com/download/data/3471", + "filename": "Swimming-heats", + "name": "Swimming heats 2016", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "Do you use up the same amount of the soap in the shower each morning, or does it depend on the size of the bar of soap? This data was collected by Rex Boggs of Glenmore State High School in Rockhampton, Queensland. Rex writes: \nI had a hypothesis that the daily weight of my bar of soap in my shower wasn't a linear function, the reason being that the tiny little bar of soap at the end of its life seemed to hang around for just about ever. I wanted to throw it out, but I felt I shouldn't do so until it became unusable. And that seemed to take weeks. \nAlso I had recently bought some digital kitchen scales and felt I needed to use them to justify the cost. I hypothesised that the daily weight of a bar of soap might be dependent upon surface area, and hence would be a quadratic function. \nI kept records for three weeks (the life of the bar), and was amazed to find that the data was linear with a very high R2 value, until the last few days of its life. \nThe data ends at day 22. On day 23 the soap broke into two pieces and one piece went down the plughole ... \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nDate \n\nDate of observation\n\nDay \n\nNumber of days since beginning of experiment\n\nWeight \n\nWeight of soap bar (grams)\n\n\n\n", - "download": "http://www.statsci.org/data/oz/soap.txt", - "filename": "soap", - "name": "Bar of Soap", + "description": "Swimming heats London", + "download": "https://dasl.datadescription.com/download/data/3472", + "filename": "Swimming-heats-London", + "name": "Swimming heats London", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "These data are for specimens of 50 varieties of timber, for modulus of rigidity, modulus of elasticity and air dried density, arranged in increasing order of magnitude of the density. ", - "download": "http://www.statsci.org/data/oz/timber.txt", - "filename": "timber", - "name": "Timber Data", + "description": "The Tour de France is the most famous bicycle race in the world. It has been run every year since 1903, except for a few during wars. The data report facts about the winners including age, time, distance, and average speed. Lance Armstrong’s 7 consecutive victories been disqualified due to the use of performance-enhancing drugs, but his statistics are still included here.", + "download": "https://dasl.datadescription.com/download/data/3489", + "filename": "Tour-de-France-2016", + "name": "Tour de France 2016", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "A soft drink bottler is analyzing vending machine service routes in his distribution system. He is interested in predicting the amount of time required by the route driver to service the vending machines in an outlet. This service activity including stocking the machine with beverage products and minor maintenance or housekeeping. The industrial engineer responsible for the study has suggested that the two most important variables affecting the delivery time are the number of cases of product stocked and the distance walked by the route driver. The engineer has collected 25 observations on delivery time (minutes), number of cases and distance walked (feet).", - "download": "http://www.statsci.org/data/general/softdrin.txt", - "filename": "softdrin", - "name": "Soft Drink Delivery Times", + "description": "The Tour de France is the most famous bicycle race in the world. It has been run every year since 1903, except for a few during wars. The data report facts about the winners including age, time, distance, and average speed. Lance Armstrong’s 7 consecutive victories been disqualified due to the use of performance-enhancing drugs, but his statistics are still included here.", + "download": "https://dasl.datadescription.com/download/data/3490", + "filename": "Tour-de-France-2017", + "name": "Tour de France 2017", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "Measurements of weight and tar, nicotine, and carbon monoxide content\nare given for 25 brands of domestic cigarettes.\n\nThe Federal Trade Commission annually rates varieties of domestic\ncigarettes according to their tar, nicotine, and carbon monoxide\ncontent. The United States Surgeon General considers each of these\nsubstances hazardous to a smoker's health. Past studies have shown\nthat increases in the tar and nicotine content of a cigarette are\naccompanied by an increase in the carbon monoxide emitted from the\ncigarette smoke.\n\nThe data presented here are taken from Mendenhall and Sincich (1992)\nand are a subset of the data produced by the Federal Trade Commission.\n\nFor more information, see the article \"Using Cigarette Data for an\nIntroduction to Multiple Regression\" by Lauren McIntyre in Volume 2,\nNumber 1, of the _Journal of Statistics Education_. ", - "download": "http://jse.amstat.org/datasets/cigarettes.dat.txt", - "filename": "cigarettes", - "name": "Cigarette data for an introduction to multiple regression", + "description": "The Gallup poll asked 1008 Americans age 18 and over whether they planned to watch the upcoming Super Bowl. The pollster also asked those who planned to watch whether they were looking forward more to seeing the football game or the commercials. ", + "download": "https://dasl.datadescription.com/download/data/3516", + "filename": "Watch-Super-bowl", + "name": "Watch the Super bowl", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, - "use_first_row_for_vectorname": false + "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "Impact strength of insulation cuts in foot-pounds. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nLot\n\nLot of insulating material\n\nCut\n\nLengthwise (Length) or crosswise (Cross)\n\nStrength\n\nImpact strength in foot-pounds\n\n\n\n", - "download": "http://www.statsci.org/data/general/insulate.txt", - "filename": "insulate", - "name": "Impact Strength Of Insulation Cuts", + "description": "The world men’s weightlifting records are categorized by weight class of the competitors. How does the weight class relate to the record? ", + "download": "https://dasl.datadescription.com/download/data/3520", + "filename": "Weightlifting-2016", + "name": "Weightlifting 2016", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "The data was collected by Stewart Fischer and David Tippetts, statistics students at the Queensland University of Technology in a subject taught by Dr Margaret Mackisack. Here is their description of the data and its collection: \nThe experiment decided upon was to see if by using two different designs of paper aeroplane, how far the plane would travel. In considering this, the question arose, whether different types of paper and different angles of release would have any effect on the distance travelled. Knowing that paper aeroplanes are greatly influenced by wind, we had to find a way to eliminate this factor. We decided to perform the experiment in a hallway of the University, where the effects of wind can be controlled to some extent by closing doors. \nIn order to make the experimental units as homogeneous as possible we allocated one person to a task, so person 1 folded and threw all planes, person 2 calculated the random order assignment, measured all the distances, checked that the angles of flight were right, and checked that the plane release was the same each time. \nThe factors that we considered each had two levels as follows: \nPaper: A4 size, 80gms and 50gms\nDesign: High Performance Dual Glider, and Incredibly Simple Glider (patterns attached to original report)\nAngle of release: Horizontal, or 45 degrees upward. \nThe random order assignment was calculated using the random number function of a calculator. Each combination of factors was assigned a number from one to eight, the random numbers were generated and accordingly the order of the experiment was found. \n\n\n\n\nVariable\n\nDescription\n\n\n\n\nDistance\n\nDistance travelled in mm\n\nPaper\n\n80gms = 1, 50gms = 2\n\nAngle\n\nHorizontal = 1, 45 degrees = 2\n\nDesign\n\nHigh-performance = 1, Incredibly simple = 2\n\nOrder\n\nOrder in which the runs were conducted\n", - "download": "http://www.statsci.org/data/oz/planes.txt", - "filename": "planes", - "name": "Paper Plane Experiment", + "description": "The Boston Marathon has had a wheelchair division since 1977.\nWho do you think\nis typically faster, the men’s marathon winner on foot\nor the women’s wheelchair marathon winner? Because\nthe conditions differ from year to year, and speeds have\nimproved over the years, it seems best to treat these as\npaired measurements. Here are summary statistics for\nthe pairwise differences in finishing time (in minutes):", + "download": "https://dasl.datadescription.com/download/data/3521", + "filename": "Wheelchair-Marathon", + "name": "Wheelchair Marathon 2016", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "\"Discovery Day\" is a day set aside by the United States Naval Postgraduate School in Monterey, California, to invite the general public into its laboratories. On Discovery Day, 21 October 1995, data on reaction time and hand-eye coordination were collected on 118 members of the public who visited the Human Systems Integration Laboratory. The age and sex of each subject were also recorded. Visitors were mostly in family groups. \nOne experiment which demonstrates motor learning and hand-eye coordination, is rotary pursuit tracking. The equipment used has a rotating disk with a 3/4\" target spot. The subject’s task is to maintain contact with the target spot with a metal wand. Trials were conducted for 15 seconds at a time, and the total contact time during the 15 seconds was recorded. Four trials were recorded for each of 108 subjects. \nThe target spot on the Circle tracker keeps constant speed in a circular path. The target spot on the Box tracker has varying speeds as it traverses the box, making the task potentially more difficult. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nSex\n\nMale (M) or female (F)\n\nAge\n\nAge of subject in years\n\nShape\n\nBox or Circle\n\nTrial1\n\nContact time for 1st trial\n\nTrial2\n\nContact time for 2nd trial\n\nTrial3\n\nContact time for 3rd trial\n\nTrial4\n\nContact time for 4th trial\n\n\n\n", - "download": "http://www.statsci.org/data/general/tracking.txt", - "filename": "tracking", - "name": "Rotary Pursuit Tracking", + "description": "The Sears Cup was established in 1993\nto honor institutions that maintain a broad-based athletic\nprogram, achieving success in many sports, both men’s and\nwomen’s. In the years following its Division III inception in\n1995, the cup was won by Williams College 15 of 17 years.\nWhy did the football team win so much? Was it because\nthey were heavier than their opponents? The data gives the\naverage team weights for selected years from 1973 to 1993.", + "download": "https://dasl.datadescription.com/download/data/3525", + "filename": "Williams-football", + "name": "Williams football", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "Experiment conducted by Bill Afantenou, second year statistics student at QUT. Here is his description of the experiment: \n``As I am a big pizza lover, I had much pleasure in involving pizza in my experiment. I became curious to find out the time it took for a pizza to be delivered to the front door of my house. I was interested to see how, by varying whether I ordered thick or thin crust, whether Coke was ordered with the pizza and whether garlic bread was ordered with the pizza, the response would be affected. \n``Because of my current financial status and limitation of time, I decided to have only two replicates, just to get a reasonable estimate of the variance. To decrease my financial burden I managed a deal with the manager of the pizza shop. I managed to get the pickup special, delivered to my house, which was the cheapest and smallest pizza made. I tried to repeat the experiment in as nearly as possible identical conditions to reduce `noise'. \n``I ordered the pizza from the same shop, being Domino's Pizza. To be consistent I ordered a Supreme pizza each time at approximately the same time of day. The response was measured from the time I closed the telephone to the time the pizza was delivered to the front door of my house. \n``I wrote each of the eight treatments on a piece of paper twice, put them all into a hat, mixed them up, and took them out one at a time to allocate the order in which each treatment was done. \n``As well as the response and treatment for each pizza delivery the actual hour of delivery was recorded, also the order in which the treatments were done and whether the driver was male or female.'' \n\n\n\n\nVariable\n\nDescription\n\n\n\n\nCrust\n\nThin=0, Thick=1\n\nCoke\n\nNo=0, Yes=1\n\nBread\n\nGarlic bread. No=0, Yes=1\n\nDriver\n\nMale=M, Female=F\n\nHour\n\nTime of order in hours since midnight\n\nDelivery\n\nDelivery time in minutes\n", - "download": "http://www.statsci.org/data/oz/pizza.txt", - "filename": "pizza", - "name": "Pizza Delivery Experiment", + "description": "The times from the first race of the women’s 2 X 500-m speed skating times at the 2010 Winter Olympics in Vancouver, B.C. are given. ", + "download": "https://dasl.datadescription.com/download/data/3530", + "filename": "speed-skating", + "name": "Winter Olympics 2010 speed skating", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "An experiment is conducted to compare the energy requirements of three physical activities: running, walking and bicycle riding. Eight subjects are asked to run, walk and bicycle a measured distance, and the number of kilocalories expended per kilometre is determined for each subject during each activity. The activities are run in random order with time for recovery between activities. Each activity was monitored exactly once for each individual. ", - "download": "http://www.statsci.org/data/general/energy.txt", - "filename": "energy", - "name": "Energy Requirements Running, Walking and Cycling", + "description": "Source: https://www.olympic.org/sochi-2014/alpine-skiing/slalom-men", + "download": "https://dasl.datadescription.com/download/data/3531", + "filename": "Winter-Olympics-2014", + "name": "Winter Olympics 2014", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "The data set comprises the results of a saturated 215-11 fractional factorial with 4 observations per run. There were 15 controllable factors. The responses are the proportional shrinkage of four samples taken from 3000-foot lengths of speedometer cable manufactured at each set of conditions. The objective was to reduce the post-extrusion shrinkage of the speedometer casing. \n\n\n\n\nVariable\n\nDescription\n\n\n\n\nA\n\nline OD\n\nB\n\nliner die\n\nC\n\nliner material\n\nD\n\nliner line speed\n\nE\n\nwire braid type\n\nF\n\nbraiding tension\n\nG\n\nwire diameter\n\nH\n\nliner tension\n\nI\n\nliner temperature\n\nJ\n\ncosting material\n\nK\n\ncoating die type\n\nL\n\nmelt temperature\n\nM\n\nscreen pack\n\nN\n\ncooling method\n\nO\n\nline speed\n\ny1\n\nshrinkage value of first sample\n\ny2\n\nshrinkage value of second sample\n\ny3\n\nshrinkage value of third sample\n\ny4\n\nshrinkage value of fourth sample\n", - "download": "http://www.statsci.org/data/general/speedome.txt", - "filename": "speedome", - "name": "Speedometer-Cable Shrinkage", + "description": "53 men completed the men’s alpine downhill. The gold medal winner finished in 100.25 seconds. Here are the times (in seconds) for all competitors. ", + "download": "https://dasl.datadescription.com/download/data/3532", + "filename": "olympics-downhill", + "name": "Winter olympics 2018 downhill", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "The data are from a Proctor and Gamble study reported by Smith and Dubey (1964) on the amount of available chlorine in a product as a function of time since manufacture. Theoretical considerations lead to the model \nChlorine = a + (0.49 - a) exp{ -b (Weeks - 8) } \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nWeeks\n\nTime in weeks since manufacture\n\nChlorine\n\nAvailable chlorine\n", - "download": "http://www.statsci.org/data/general/chlorine.txt", - "filename": "chlorine", - "name": "Available Chlorine", + "description": "he women’s heptathlon in the Olympics consists of seven track-and-field events: the 200 m and 800 m runs, 100 m high hurdles, shot put, javelin, high jump, and long jump. Each contestant is awarded points for each event based on her performance. So, which performance deserves more points? It’s not clear how to compare them. They aren’t measured in the same units, or even in the same direction (longer jumps are better but shorter times are better.)", + "download": "https://dasl.datadescription.com/download/data/3536", + "filename": "Womens-Heptathlon", + "name": "Womens Heptathlon 2016", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "The data give the normalized magnitudes of the voice data when the vowel 'ooh' was sung at a pitch of 290 Hz. A Kurzweil K2500 Sampler/Synthesizer was used to capture and to store the data. \nThe frequencies found in the signal can be used to identify the phonetical vowel, and are of interest in voice synthesis, therapy and training. Further details are given in Oliver (1997). \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nMagnitude\n\nNormalized Magnitudes at equi-spaced time intervals\n\n\n\n", - "download": "http://www.statsci.org/data/general/ooh.txt", - "filename": "ooh", - "name": "Voice Data from Singing the Vowel 'ooh'", + "description": "The Women’s 500 metres in short track speed skating at the 2018 Winter Olympics took place from 10 to 13 February 2018 at the Gangneung Ice Arena in Gangneung, South Korea.The defending champion from 2014, Li Jianrou, had retired, but the 2014 silver medalist Arianna Fontana competed and eventually won the event. ", + "download": "https://dasl.datadescription.com/download/data/3537", + "filename": "Womens-short-track", + "name": "Womens short track 2018", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "In studies aimed at characterising an author's style, samples of n words are taken and the number of function words in each sample counted. Often binomial or Poisson distributions are assumed to hold for the proportions of function words. The table shows the combined frequencies (x) of the articles \"the\", \"a\" and \"an\" in samples from Macauley's \"Essay on Milton\", taken from the Oxford edition of Macualey's (1923) literary essays. Non-overlapping samples were drawn from opening words of two randomly chosen lines from each of 50 pages of printed text, 10 word samples being simply extensions of 5 word samples. The data show clear evidence of underdispersion.", - "download": "http://www.statsci.org/data/oz/wdcount.txt", - "filename": "wdcount", - "name": "Underdispersed Word Counts", + "description": "The women’s 1500 metres speed skating competition for the 2006 Winter Olympics was held in Turin, Italy, on 22 February ", + "download": "https://dasl.datadescription.com/download/data/3538", + "filename": "Womens-speed-skating", + "name": "Womens speed skating 2006", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true - }, + } + ], + "subcategory_name": "Sport" + }, + { + "datasets": [ { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "Progressive Insurance asked customers who had been involved in auto accidents how far they were from home when the accident happened. ", "download": "https://dasl.datadescription.com/download/data/3039", "filename": "accidents", "name": "Accidents", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "At a barbershop music singing competition, choruses are judged on three scales: Music (quality of the arrangement, etc.), Performance, and Singing. The scales are supposed to be independent of each other, and each is scored by a different judge, but a friend claims that he can predict a chorus’s singing score from the other two […] ", "download": "https://dasl.datadescription.com/download/data/3061", "filename": "Barbershop-music", "name": "Barbershop music", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "In 2016 13.27 million people attended a Broadway show, paying an average of more than $100 per ticket. The Broadway League, Inc. (https://www.broadwayleague.com/research/statistics-broadway-nyc/) provides some historical and current data. These variables are available for each year since the 1984-85 season: Season (The initial year of the season, so the 1984-85 season is 1984.) Gross ($M) […] ", "download": "https://dasl.datadescription.com/download/data/3087", "filename": "Broadway-shows", "name": "Broadway shows", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "Fast food is often considered unhealthy because much of it is high in both fat and sodium. But are the two related? The data give the fat and sodium contents of several brands of burgers. ", "download": "https://dasl.datadescription.com/download/data/3088", "filename": "Burgers", "name": "Burgers", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "The dataset holds facts about candy bars read from their nutrition labels. The data are a good example for multiple regression (e.g. what contributes to the calories of a candy bar?). For such an analysis, the indicator variable for nuts appears to work well. Note that 5 sugar-free candy bars are marked as NA in […] ", "download": "https://dasl.datadescription.com/download/data/3092", "filename": "Candy-bars", "name": "Candy bars", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "In 1998, as an advertising campaign, the Nabisco Company announced a “1000 Chips Challenge,” claiming that every 18-ounce bag of their Chips Ahoy! cookies contained at least 1000 chocolate chips. Dedicated statistics students at the Air Force Academy randomly selected bags of cookies and counted the chocolate chips. The data report their counts. ", "download": "https://dasl.datadescription.com/download/data/3110", "filename": "Chips-Ahoy", "name": "Chips Ahoy!", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "The website rcdb.com, the Roller Coaster Database, holds facts about every roller coaster in the world, current or past. (If you know of one that is missing, please let the site master know.) These data are for recently opened coasters, most of which are still in operation.", "download": "https://dasl.datadescription.com/download/data/3118", "filename": "Coasters-2015", "name": "Coasters 2015", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "The data are drawn from the work of O. M. Latter in 1902 and were used in a fundamental textbook on statistical quality control by L. H. C. Tippett (1902–1985), one of the pioneers in that field. \n", "download": "https://dasl.datadescription.com/download/data/3149", "filename": "Cuckoos-and-quality-control", "name": "Cuckoos and quality control", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "The data are from a production process that makes 250 units each hour. The data were collected over a normal 12-hour shift one day.", "download": "https://dasl.datadescription.com/download/data/3155", "filename": "Defect-monitoring", "name": "Defect monitoring", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "The data are from a production process that makes 250 units each hour. The data were collected over a normal 12-hour shift one day. ", "download": "https://dasl.datadescription.com/download/data/3156", "filename": "Defect-monitoring_", "name": "Defect monitoring second product", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "Some students checked 6 bags of Doritos marked with a net weight of 28.3 grams. They carefully weighed the contents of each bag and recorded the weights in grams.", "download": "https://dasl.datadescription.com/download/data/3171", "filename": "Doritos", "name": "Doritos", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "A student wants to investigate the effects of real vs.\nsubstitute eggs on his favorite brownie recipe. He enlists the\nhelp of 10 friends and asks them to rank each of 8 batches\non a scale from 1 to 10. Four of the batches were made with\nreal eggs, four with substitute eggs. The judges tasted the\nbrownies in random order.", "download": "https://dasl.datadescription.com/download/data/3185", "filename": "Eggs", "name": "Eggs", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "Many people fear Friday the 13th as an unlucky day. Researchers looked into this to see whether there were differences in traffic or in admissions to hospitals for road accidents on Friday 13th when compared with the adjacent Friday 6th. ", "download": "https://dasl.datadescription.com/download/data/3219", "filename": "Friday-13-Accidents", "name": "Friday the 13th Accidents", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "As a project for an Introductory Statistics course, students checked 6 bags of Fritos marked with a net weight of 35.4 grams. They carefully weighed the contents of each bag, recording the weights (in grams):", "download": "https://dasl.datadescription.com/download/data/3222", "filename": "Fritos", "name": "Fritos", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "The movie Harry Potter and the Sorcerer’s Stone opened as a great success. But every movie sees declining revenue over time. The dataset gives the daily revenues for the movie during its first 17 days. ", "download": "https://dasl.datadescription.com/download/data/3256", "filename": "Harry-Potter-revenue", "name": "Harry Potter revenue", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "Is the Statue of Liberty’s nose too long? Her nose measures 4′6″, but she is a large statue, after all. Her arm is 42 feet long. That means her arm is 42/4.5 = 9.3 times as long as her nose. Is that a reasonable ratio? The data give arm and nose lengths of 18 girls ", "download": "https://dasl.datadescription.com/download/data/3311", "filename": "Libertys-nose", "name": "Libertys nose", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "Lottery numbers", "download": "https://dasl.datadescription.com/download/data/3318", "filename": "Lottery-numbers", "name": "Lottery numbers", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "Loyalty program", "download": "https://dasl.datadescription.com/download/data/3319", "filename": "Loyalty-program", "name": "Loyalty program", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "Movie lengths 2010", "download": "https://dasl.datadescription.com/download/data/3348", "filename": "Movie-lengths-2010", "name": "Movie lengths 2010", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "Students in an introductory statistics course were asked how many songs they had in their digital music library.", "download": "https://dasl.datadescription.com/download/data/3352", "filename": "Music-library", "name": "Music library", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "New York State inspectors assess all bridges in the state every two years including a bridge’s individual parts. Bridges are analyzed for their capacity to carry vehicular loads. Inspectors are required to evaluate, assign a condition score, and document the condition of up to 47 structural elements, including rating 25 components of each span of a bridge, in addition to general components common to all bridges. The NYSDOT condition rating scale ranges from 1 to 7, with 7 being in new condition and a rating of 5 or greater considered as good conditionBridges that cannot safely carry heavy vehicles, such as some tractor trailers, are posted with weight limits. Based upon inspection and load capacity analysis, any bridge deemed unsafe gets closed.\nHow does the condition of the bridge relate to its age? Are there any outliers? Can you account for them by identifying them?", "download": "https://dasl.datadescription.com/download/data/3364", "filename": "New-York-bridges-2016", "name": "New York bridges 2016", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "The GfK Roper Reports® Worldwide Survey asked 30,000 consumers in 23 countries about their attitudes on health, beauty, and other personal values. One question participants were asked was how important their personal appearance is to them. The data are a contingency table of responses to this question by age decade. ", "download": "https://dasl.datadescription.com/download/data/3392", "filename": "Personal-appearance", "name": "Personal appearance", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "BYU Human Performance Research Center http://www.byu.edu/chhp/intro.html#lrc Director: Mark Ricard 116A RB, (801) 378-8958", "download": "https://dasl.datadescription.com/download/data/3445", "filename": "Shirt-sizes", "name": "Shirt sizes", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "A last is a form, traditionally made of wood, in the\nshape of the human foot. Lasts of various sizes are used by\nshoemakers to make shoes. In the United States, shoe sizes are\ndefined differently for men and women:\nU.S. men’s shoe size = (last size in inches * 3) – 24\nU.S. women’s shoe size = (last size in inches * 3) – 22.5\nBut in Europe, they are both: Euro size = last size in cm * 3/2\nThe data give the European shoe sizes of 269 college\nstudents (converted from their reported U.S. shoe sizes.)", "download": "https://dasl.datadescription.com/download/data/3447", "filename": "Shoe-Sizes", "name": "Shoe Sizes", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "The dataset gives the heights (in inches) of 130 members of a choir and the part they sing. Note that Sopranos and Altos are typically women and Tenors and Basses are typically men. ", "download": "https://dasl.datadescription.com/download/data/3449", "filename": "Singers-by-parts", "name": "Singers by parts", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "Sugar is a major ingredient in many breakfast cereals. The data gives the sugar content as a percentage of weight for 49 brands of cereal. Data were collected from nutrition labels in a supermarket. ", "download": "https://dasl.datadescription.com/download/data/3467", "filename": "Sugar-cereal", "name": "Sugar in cereal", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "The data give counts of 626 individuals categorized according to their “tattoo status” and their “hepatitis status.” Is there a relationship? ", "download": "https://dasl.datadescription.com/download/data/3476", "filename": "Tattoos", "name": "Tattoos", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "A bank is studying the time that it takes 6 of its tellers to serve an average customer. Customers line up in the queue and then go to the next available teller. Is there a difference? Can we pick out the best or worst performing teller? ", "download": "https://dasl.datadescription.com/download/data/3478", "filename": "Tellers", "name": "Tellers", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "Since 1994, the Best Roller Coaster Poll (www. ushsho.com/bestrollercoasterpoll.htm) has been ranking the world’s best roller coasters. In 2013, Bizarro dropped to 4th after earning the top steel coaster rank for six straight years. Data on the top 14 steel coasters from this poll are given. ", "download": "https://dasl.datadescription.com/download/data/3481", "filename": "Thrills-2013", "name": "Thrills 2013", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "The Minnesota Department of Transportation\nhoped that they could measure the weights of big trucks without\nactually stopping the vehicles by using a newly developed\n“weight-in-motion” scale. To see if the new device was accurate,\nthey conducted a calibration test. They weighed several stopped\ntrucks (Static Weight) and assumed that this weight was correct.\nThen they weighed the trucks again while they were moving to\nsee how well the new scale could estimate the actual weight.", "download": "https://dasl.datadescription.com/download/data/3512", "filename": "Vehicle-weights", "name": "Vehicle weights", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "Washing", "download": "https://dasl.datadescription.com/download/data/3515", "filename": "Washing", "name": "Washing", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "Consumer Reports tested 11 brands of vanilla yogurt and found these numbers of calories per serving. ", "download": "https://dasl.datadescription.com/download/data/3544", - "filename": "Yogurt_", - "name": "Yogurt", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Yogurt flavors", - "download": "https://dasl.datadescription.com/download/data/3545", - "filename": "Yogurt-flavors", - "name": "Yogurt flavors", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "http://jse.amstat.org/datasets/aptness.txt", - "download": "http://jse.amstat.org/datasets/aptness.dat.txt", - "filename": "Aptness", - "name": "Evaluating Aptness of a Regression Model", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": false - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "In many statistical models the normal distribution of the response is an essential assumption.\nThis paper uses a dataset of 2000 euro coins with information (up to the milligram) about\nthe weight of each coin. As the physical coin production process is subject to a multitude\nof (very small) variability sources, it seems reasonable to expect that the empirical\ndistribution of the weight of euro coins does agree with the normal distribution. Goodness\nof fit tests however show that this is not the case. Moreover, some outliers complicate\nthe analysis. Mixtures of normal distributions and skew normal distributions are fitted\nto the data, revealing that the normality assumption might not hold for those weights.", - "download": "http://jse.amstat.org/datasets/euroweight.dat.txt", - "filename": "euroweight", - "name": "The Weight of Euro Coins ", - "number_format": 31, - "remove_quotes": true, - "separator": "SPACE", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": false - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Title, year of release, length in minutes, number of cast members listed, rating, and number of lines \nof description are recorded for a simple random sample of 100 movies. One can use the sample to obtain base-line information \non the movie guide from which the data were collected. The dataset also illustrates two paradoxes for associations between \nthree variables: non-transitivity of positive correlation and Simpson's paradox. SOURCE: The data were taken as a simple \nrandom sample of the approximately 19,000 movies (not including made-for-TV movies) in Leonard Maltin's Movie and Video \nGuide, 1996. ", - "download": "http://jse.amstat.org/datasets/films.dat.txt", - "filename": "films", - "name": "films dataset", - "number_format": 31, - "remove_quotes": true, - "separator": "SPACE", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": false - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "This dataset contains descriptive data of contestants on the game shoe \"Friend or Foe?\". Information on the contestant's \nrace, sex, age, prize money, and playing strategy are included. ", - "download": "http://jse.amstat.org/datasets/friend_or_foe.dat.txt", - "filename": "friend_or_foe", - "name": "Data from the Television Game Show \"Friend or Foe?\"", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The dataset contains hat size as well as circumference, length of major axis \nand length of minor axis of the inner hat band for 26 hats. The manufacturer \nand the country of manufacture are also included.", - "download": "http://jse.amstat.org/datasets/hats.dat.txt", - "filename": "hats", - "name": " Hat measurements, including hat size", - "number_format": 31, - "remove_quotes": true, - "separator": "SPACE", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": false - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The dataset consists of samples of size six taken without replacement\nfrom the integers {1, 2, 3, ..., 42}. There are actually three\ndatasets from three different sources, and in each case the six-tuples\nare (in theory) random selections or samples. The observations in each\nsample are given in the order in which they were obtained or selected.", - "download": "http://jse.amstat.org/datasets/lotto.dat.txt", - "filename": "lotto", - "name": "Lotto 6/42 Selections from Individuals, Irish National Lottery, and S-Plus Simulation", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": false - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "This file contains daily per theater box office receipts for 49 \nmovies. This data is to accompany the article entitled Movie Data.", - "download": "http://jse.amstat.org/datasets/moviedaily.dat.txt", - "filename": "moviedaily", - "name": "moviedaily", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Every year actors and actresses are chosen to receive the Oscars awards for best actor and for best actress. This dataset \ncontains information about each of the winners for each of the 77 annual Oscar awards.\n\nAlthough there have been only 77 Oscars, there are 78 male winners and 78 female winners because ties happened on two \noccasions (1933 for the best actor and 1969 for the best actress).\n\nVARIABLE DESCRIPTIONS:\n\nColumns Variables\n 1 Gender (m=male f=female)\n 3-4 Oscar Year Number (1-77)\n 6-9 Year the Oscar Took Place\n 11-29 Winner’s first and last name\n 31-60 Name of the Movie in which the winner acted\n 62-63 Age of winner (at the beginning of the winning year)\n 65-77 Birth place (State if born in USA, else Country)\n 79-80 Month in which the winner was born\n 82-83 Day of month on which winner was born\n 85-88 Year the winner was born", - "download": "http://jse.amstat.org/datasets/oscars.dat.txt", - "filename": "oscars", - "name": "Oscars: Best Actors and Actresses", - "number_format": 31, - "remove_quotes": true, - "separator": "SPACE", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": false - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "This dataset contains information collected from rolling the pair of\npigs (found in the game \"Pass the Pigs\") 6000 times. A description of\nthe rules, scoring configurations, and data collection method are\nincluded in the accompanying paper.", - "download": "http://jse.amstat.org/datasets/pig.dat.txt", - "filename": "pig", - "name": "Data from the game \"Pass the Pigs\"", - "number_format": 31, - "remove_quotes": true, - "separator": "SPACE", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": false - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "In a residential home, energy consumption is closely related to the\noutdoor temperature and size of the house. In a home of a given size,\ntemperature fluctuations and energy consumption vary fairly predictably\nover time. When homeowners add a room, other things being equal,\nutility usage should increase. This dataset permits students to\nestimate the energy demand and make forecasts for future months, as\nwell as explore other relationships.\n\nThe dataset contains natural gas and electricity usage data for a\ngas-heated single-family residence in the Boston area from September\n1990 through May 1997, accompanied by monthly climatological data. \nThe dataset is useful for illustrating the concepts and techniques of\ncentral tendency, dispersion, elementary time series analysis,\ncorrelation, simple and multiple regression, and variable\ntransformations.", - "download": "http://jse.amstat.org/datasets/utility.dat.txt", - "filename": "utility", - "name": "What Does It Take to Heat a New Room? ", - "number_format": 31, - "remove_quotes": true, - "separator": "SPACE", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": false - } - ], - "subcategory_name": "Other" - }, - { - "datasets": [ - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The data give the number of deaths in prison custody in Australia in each of the six years 1990 to 1995, given separately for Aboriginal and Torres Strait Islanders (indigenous) and others (non-indigenous). \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nYear\n\n1990 through 1995\n\nIndigenous\n\nYes = Aboriginal or Torres Strait Islander, No = Non-indigenous\n\nPrisoners\n\nTotal number in prison custody\n\nDeaths\n\nNumber of deaths in prison custody\n\nPopulation\n\nAdult population (15+ years)\n\n\n\n\nThe data were collected in response to the Royal Commission into Aboriginal Deaths in Custody, the final report of which was tabled in the Federal Parliament on the 9 May 1991. \nThe report of the Royal Commission has two streams. One is concerned with the ninety-nine Aboriginal and Torres Strait Islander deaths in custody which occurred throughout Australia during the period 1 January 1980 to 31 May 1989. Issues around the causes of death, culpability of custodians and their employers, and the prevention of future deaths were addressed in depth. The second stream concerned what the Royal Commission called the ‘underlying issues’: the social, cultural, and legal factors which, in the view of the Commissioners, had some bearing on the deaths. These underlying issues, as revealed from the chapter headings of the Royal Commission’s National Report, included the Legacy of History, Aboriginal Society Today, Relations With the Non-Aboriginal Community, The Harmful Use of Alcohol and Other Drugs, Schooling, Employment, Unemployment and Poverty, Housing and Infrastructure, Land Needs, and Self-determination. \nThe link between the Royal Commission’s discussion of the individual deaths investigated, the prevention of future deaths and the underlying issues, is its position on the over-representation of Indigenous people in custody in Australia. A central conclusion of the Royal Commission, illustrating this point, was as follows: \nThe work of the commission has established that Aboriginal people in custody do not die at a greater rate than non-Aboriginal people in custody. \nHowever, what is overwhelming different is the rate at which Aboriginal people come into custody, compared with the rate of the general community ... The ninety-nine who died in custody illustrate that over-representation and, in a sense, are the victims of it. \nThe conclusions are clear. Aboriginal people die in custody at a rate relevant to their proportion of the whole population which is totally unacceptable and which would not be tolerated if it occurred in the non-Aboriginal community. But this occurs not because Aboriginal people in custody are more likely to die than others in custody, but because the Aboriginal population is grossly over-represented in custody. Too many Aboriginal people are in custody too often (Johnston, 1991, Vol 1, p6).", - "download": "http://www.statsci.org/data/oz/custody.txt", - "filename": "custody", - "name": "Aboriginal Deaths in Custody", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Facts on the countries of Asia. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nCountry \n\nName\n\nArea \n\nTotal area (sq km)\n\nPopulation \n\nPopulation July 1995 est.\n\nLife \n\nLife Expectancy 1995 est. (years)\n\nGDP \n\nGDP 1994 (US$ billions)\n\nGDP/caput \n\nGDP per person 1994 est (US$)\n\n\n\n", - "download": "http://www.statsci.org/data/oz/asia.txt", - "filename": "asia", - "name": "Countries of Asia", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The United States Census Bureau keeps track of the number of adoptions in each State (and Washington D.C.). The data includes the population of each state as well. How should adoptions be summarized and displayed? ", - "download": "https://dasl.datadescription.com/download/data/3043", - "filename": "Adoptions", - "name": "Adoptions", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "-", - "download": "https://dasl.datadescription.com/download/data/3177", - "filename": "Drivers-Licenses-2014", - "name": "Drivers Licenses 2014", + "filename": "Yogurt_", + "name": "Yogurt", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "Do flexible work schedules reduce the demand for resources? The Lake County, Illinois, Health Department experimented with a flexible four-day workweek. For a year, the department recorded the mileage driven by 11 field workers on an ordinary five-day workweek. Then it changed to a flexible four-day workweek and recorded mileage for another year. ", - "download": "https://dasl.datadescription.com/download/data/3540", - "filename": "Work-week", - "name": "Work week", + "description": "Yogurt flavors", + "download": "https://dasl.datadescription.com/download/data/3545", + "filename": "Yogurt-flavors", + "name": "Yogurt flavors", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true } ], - "subcategory_name": "Administration" + "subcategory_name": "Other" }, { "datasets": [ { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "The data give the survival status of passengers on the Titanic, together with their names, age, sex and passenger class. \nAbout half of the ages for the 3rd Class passengers are missing, although a good many of these could be filled in from the original source below. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nName\n\nRecorded name of passenger\n\nPClass\n\nPassenger class: 1st, 2nd or 3rd\n\nAge\n\nAge in years\n\nSex\n\nmale or female\n\nSurvived\n\n1 = Yes, 0 = No\n\n\n\n", - "download": "http://www.statsci.org/data/general/titanic.txt", - "filename": "titanic_", - "name": "Passengers on the Titanic", + "description": "The data give the number of deaths in prison custody in Australia in each of the six years 1990 to 1995, given separately for Aboriginal and Torres Strait Islanders (indigenous) and others (non-indigenous). \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nYear\n\n1990 through 1995\n\nIndigenous\n\nYes = Aboriginal or Torres Strait Islander, No = Non-indigenous\n\nPrisoners\n\nTotal number in prison custody\n\nDeaths\n\nNumber of deaths in prison custody\n\nPopulation\n\nAdult population (15+ years)\n\n\n\n\nThe data were collected in response to the Royal Commission into Aboriginal Deaths in Custody, the final report of which was tabled in the Federal Parliament on the 9 May 1991. \nThe report of the Royal Commission has two streams. One is concerned with the ninety-nine Aboriginal and Torres Strait Islander deaths in custody which occurred throughout Australia during the period 1 January 1980 to 31 May 1989. Issues around the causes of death, culpability of custodians and their employers, and the prevention of future deaths were addressed in depth. The second stream concerned what the Royal Commission called the ‘underlying issues’: the social, cultural, and legal factors which, in the view of the Commissioners, had some bearing on the deaths. These underlying issues, as revealed from the chapter headings of the Royal Commission’s National Report, included the Legacy of History, Aboriginal Society Today, Relations With the Non-Aboriginal Community, The Harmful Use of Alcohol and Other Drugs, Schooling, Employment, Unemployment and Poverty, Housing and Infrastructure, Land Needs, and Self-determination. \nThe link between the Royal Commission’s discussion of the individual deaths investigated, the prevention of future deaths and the underlying issues, is its position on the over-representation of Indigenous people in custody in Australia. A central conclusion of the Royal Commission, illustrating this point, was as follows: \nThe work of the commission has established that Aboriginal people in custody do not die at a greater rate than non-Aboriginal people in custody. \nHowever, what is overwhelming different is the rate at which Aboriginal people come into custody, compared with the rate of the general community ... The ninety-nine who died in custody illustrate that over-representation and, in a sense, are the victims of it. \nThe conclusions are clear. Aboriginal people die in custody at a rate relevant to their proportion of the whole population which is totally unacceptable and which would not be tolerated if it occurred in the non-Aboriginal community. But this occurs not because Aboriginal people in custody are more likely to die than others in custody, but because the Aboriginal population is grossly over-represented in custody. Too many Aboriginal people are in custody too often (Johnston, 1991, Vol 1, p6).", + "download": "http://www.statsci.org/data/oz/custody.txt", + "filename": "custody", + "name": "Aboriginal Deaths in Custody", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "For each of ten streets with bike lanes, investigators measured the distance between the centre line and a cylist in the bike lane. They used photography to determine the distance between the cyclist and a passing car on those same ten streets, recording all distances in feet. \n", - "download": "http://www.statsci.org/data/general/cyclist.txt", - "filename": "cyclist", - "name": "Distance of Cars from Cyclists", + "description": "Facts on the countries of Asia. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nCountry \n\nName\n\nArea \n\nTotal area (sq km)\n\nPopulation \n\nPopulation July 1995 est.\n\nLife \n\nLife Expectancy 1995 est. (years)\n\nGDP \n\nGDP 1994 (US$ billions)\n\nGDP/caput \n\nGDP per person 1994 est (US$)\n\n\n\n", + "download": "http://www.statsci.org/data/oz/asia.txt", + "filename": "asia", + "name": "Countries of Asia", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "Hourly carbon monoxide (CO) averages were recorded on summer weekdays at a measurement station in Los Angeles. The station was established by the Environmental Protection Agency as part of a larger study to assess the effectiveness of the catalytic converter. It was located about 25 feet from the San Diego Freeway, which in this particular area is located at 145 degrees north. It was located such that winds from 145 to 325 degress (which in the summer are the prevalent wind directions during the daylight hours) transport the CO emissions from the highway toward the measurement station. Aggregate measurements were recored for each hour of the day 1 to 24. \nHour \n- \nhour of the day, from midnight to midnight \nCO \n- \naverage summer weekday CO concentration (parts per million) \nTD \n- \naverage weekday traffic density (traffic count/traffic speed) \nWS \n- \naverage perpendicular wind-speed component,\nwind speed x cos(wind direction - 235 degrees) \n\nIt would be interesting to have wind speed and direction recorded separately. ", - "download": "http://www.statsci.org/data/general/cofreewy.txt", - "filename": "cofreewy", - "name": "Carbon Monoxide from a Freeway", + "description": "The United States Census Bureau keeps track of the number of adoptions in each State (and Washington D.C.). The data includes the population of each state as well. How should adoptions be summarized and displayed? ", + "download": "https://dasl.datadescription.com/download/data/3043", + "filename": "Adoptions", + "name": "Adoptions", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "This is a highly fractionated two-level factorial design employed as a screening design in an off-line welding experiment performed by the National Railway Corporation of Japan. There were 16 runs and 9 experimental factors. The response variable is the observed tensile strength of the weld, one of several quality characteristics measured. All other variables are at plus and minus levels. \n\n\n\n\nVariable\n\nDescription\n\n\n\n\nRods\n\nKind of welding rods\n\nDrying\n\nPeriod of drying\n\nMaterial\n\nWelded material\n\nThickness\n\nThickness\n\nAngle\n\nAngle\n\nOpening\n\nOpening\n\nCurrent\n\nCurrent\n\nMethod\n\nWelding method\n\nPreheating\n\nPreheating\n\nStrength\n\nTensile strength of the weld in kg/mm\n", - "download": "http://www.statsci.org/data/general/welding.txt", - "filename": "welding", - "name": "Tensile Strength of Welds", + "description": "-", + "download": "https://dasl.datadescription.com/download/data/3177", + "filename": "Drivers-Licenses-2014", + "name": "Drivers Licenses 2014", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "Military pilots sometimes black out when their brains are deprived of oxygen due to G-forces during violent maneuvers. Glaister and Miller (1990) produced similar symptoms by exposing volunteers’ lower bodies to negative air pressure, likewise decreasing oxygen to the brain. The data lists the subjects' ages and whether they showed syncopal blackout related signs (pallor, sweating, slow heartbeat, unconsciousness) during an 18 minute period. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nSubject\n\nInitials of the subject's name\n\nAge\n\nSubject's age in years\n\nSigns\n\nWhether subject showed blackout-related signs (0=No, 1=Yes)\n", - "download": "http://www.statsci.org/data/general/gforces.txt", - "filename": "gforces", - "name": "G-Induced Loss of Consciousness", + "description": "Do flexible work schedules reduce the demand for resources? The Lake County, Illinois, Health Department experimented with a flexible four-day workweek. For a year, the department recorded the mileage driven by 11 field workers on an ordinary five-day workweek. Then it changed to a flexible four-day workweek and recorded mileage for another year. ", + "download": "https://dasl.datadescription.com/download/data/3540", + "filename": "Work-week", + "name": "Work week", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true - }, + } + ], + "subcategory_name": "Administration" + }, + { + "datasets": [ { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "Commercial airlines overbook flights, selling more tickets than they have seats, because a sizeable number of reservation holders don’t show up in time for their flights. But sometimes, there are more passengers wishing to board than there are seats. Most airlines try to entice travelers to voluntarily give up their seats in return for free […] ", "download": "https://dasl.datadescription.com/download/data/3048", "filename": "Airline-bumping", "name": "Airline bumping 2017", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "https://www.albany.edu/sourcebook/csv/t3177.csv adapted from: U.S. Department of Transportation, Federal Aviation Administration, Semiannual Report to Congress on the Effectiveness of the Civil Aviation Security Program, July 1 to \"December 31, 1978, Exhibit 10; July 1 to December 31, 1982, Exhibit 10; July 1 to December 31, 1984, Exhibit 7; July 1 to December 31, 1989, p. 11 (Washington, DC: U.S. Department of Transportation); U.S. Department of Transportation, Federal Aviation Administration, Annual Report to Congress on Civil Aviation Security, January 1, 1993-December 31, 1993, p. 9; January 1, 1995-December 31, 1995, p. 11 (Washington, DC: U.S. Department of Transportation); and data provided by the U.S. Department of Transportation, Federal Aviation Administration and Bureau of Transportation Statistics [Online]. Available: http://www.bts.gov/publications/national_transportation_statistics/ 2003/html/table_02_16.html [May 24, 2004]. Table adapted by SOURCEBOOK staff.", "download": "https://dasl.datadescription.com/download/data/3049", "filename": "Airport-screening", "name": "Airport screening", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "The Bicycle Helmet Safety Institute website includes a report on the number of bicycle fatalities per year in the United States. The data gives the counts for the years 1994–2015. ", "download": "https://dasl.datadescription.com/download/data/3073", "filename": "Bike-safety-2015", "name": "Bike safety 2015", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "The dataset is the number of camp sites at each of the public parks in Vermont ", "download": "https://dasl.datadescription.com/download/data/3091", "filename": "Camp-sites", "name": "Camp sites", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "The data give the number of domestic U.S. flights flown in each year from 2000 to 2016 ", "download": "https://dasl.datadescription.com/download/data/3209", "filename": "Flights-2016", "name": "Flights 2016", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "The Bureau of Transportation Statistics of the U.S. Department of Transportation publishes information about airline performance. The data report the percentage of flights departing on time each month from January 1994 through June 2016. ", "download": "https://dasl.datadescription.com/download/data/3210", "filename": "Flights-on-time-2016", "name": "Flights on time 2016", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "Many people fear Friday the 13th as an unlucky day. Researchers looked into this to see whether there were differences in traffic or in admissions to hospitals for road accidents on Friday 13th when compared with the adjacent Friday 6th. ", "download": "https://dasl.datadescription.com/download/data/3220", "filename": "Friday-13-traffic", "name": "Friday the 13th traffic", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "Many drivers of cars that can run on regular gas actually buy premium in the belief that they will get better gas mileage. To test that belief, we use 10 cars from a company fleet in which all the cars run on regular gas. Each car is filled first with either regular or premium gasoline, decided by a coin toss, and the mileage for that tankful is recorded. Then the mileage is recorded again for the same cars for a tankful of the other kind of gaso-line. We don’t let the drivers know about this experiment.", "download": "https://dasl.datadescription.com/download/data/3230", "filename": "Gasoline__", "name": "Gasoline", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "Much of the public and private industry in Hawaii depends on tourism. The following time series plot shows the number of domestic visitors to Hawaii by air from the rest of the United States per month from January 2002 through December 2006 before the financial crisis of 2008. ", "download": "https://dasl.datadescription.com/download/data/3257", "filename": "Hawaii-tourism", "name": "Hawaii tourism", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "The data report the percentage of flights that were late and the percentage that departed on time for each month from 1995 through early 2016 ", "download": "https://dasl.datadescription.com/download/data/3309", "filename": "Late-arrivals-2016", "name": "Late arrivals 2016", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "The Research and Innovative Technology Administration of the Bureau of Transportation Statistics reports load factors (passenger-miles as a percentage of available seat miles) for commercial airlines for every month from October 2002 through 2017 for both domestic and international flights. ", "download": "https://dasl.datadescription.com/download/data/3315", "filename": "Load-factors-2016", "name": "Load factors 2016", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "The Research and Innovative Technology Administration of the Bureau of Transportation Statistics reports load factors (passenger-miles as a percentage of available seat miles) for commercial airlines for every month from October 2002 through 2017 for both domestic and international flights. ", "download": "https://dasl.datadescription.com/download/data/3316", "filename": "Load-factors-2017", "name": "Load factors 2017", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "The data give the number of passengers at Oakland (CA) airport month by month since 1997. ", "download": "https://dasl.datadescription.com/download/data/3371", "filename": "Oakland-passengers-2016", "name": "Oakland passengers 2016", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "The National Highway Traffic Safety Administration reports seat belt use and fatalities in car accidents by state. How do fatalities relate to seat belt use? ", "download": "https://dasl.datadescription.com/download/data/3442", "filename": "Seat-belts-2015", "name": "Seat belts 2015", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "The data report the density (cars per mile) and average speed of traffic on city highways. The data were collected at the same location at 10 different times randomly selected within a span of 3 months. ", "download": "https://dasl.datadescription.com/download/data/3560", "filename": "Speed-density", "name": "Speed and density", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "A tire manufacturer tested the braking performance of one of its tire models on a test track. The company tried the tires on 10 different cars, recording the stopping distance for each car on both wet and dry pavement. ", "download": "https://dasl.datadescription.com/download/data/3460", "filename": "Stopping-distance", "name": "Stopping distance", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "A tire manufacturer tested the braking performance of one of its tire models on a test track. The company tried the tires on 10 different cars, recording the stopping distance for each car on both wet and dry pavement from 60 miles per hour. The test was run on both dry and wet pavement. (The actual braking distance takes into account the driver’s reaction time, which typically adds nearly 300 feet at 60 mph!)", "download": "https://dasl.datadescription.com/download/data/3461", "filename": "Stopping-distance-60", "name": "Stopping distance 60", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "Traffic fatalities in a variety of vehicles and for a variety of situations for the years from 1975 to 2013. These are multiple time series, but can also be related to each other. ", "download": "https://dasl.datadescription.com/download/data/3495", "filename": "Traffic-fatalities", "name": "Traffic fatalities 2013", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "The U.S. Energy Information Administration (EIA) collects data on the total energy used per capita in transportation for each state and the District of Columbia. The data show the per capita consumption in the year 2015 in millions of BTU per person. ", "download": "https://dasl.datadescription.com/download/data/3496", "filename": "Transportation-Energy", "name": "Transportation Energy use", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "U.S. Department of Transportation reports records of border crossings into each state on the U.S. border. Here are the border crossings by trucks for Alaska, recorded each month from 1999 through 2017. ", "download": "https://dasl.datadescription.com/download/data/3499", "filename": "Trucks_", "name": "Trucks", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "http://jse.amstat.org/datasets/airport.txt", - "download": "http://jse.amstat.org/datasets/airport.dat.txt", - "filename": "US-Airport-Statistics", - "name": " US Airport Statistics", - "number_format": 31, - "remove_quotes": true, - "separator": "SPACE", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": false - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The data is a set of 50000 (1.3 MB ) observations containing roughly 2 minutes of traffic from the one hour, larger \ndec-pkt-1.tcp file used in the paper. The larger file can be accessed from the author's web page or from its source. With \nonly 50000 observations, the data set ", - "download": "http://jse.amstat.org/datasets/packetdata.dat.txt", - "filename": "packetdata", - "name": "packetdata", - "number_format": 31, - "remove_quotes": true, - "separator": "SPACE", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "This dataset consists of a listing of all US interstate\nhighways, treating the highway as the sampling unit.\n\nVARIABLE DESCRIPTIONS:\nInterstate # Columns 1-2\nNumber of states Columns 4-5\nApproximate miles Columns 7-10\nSouthern or Western end Columns 12-34\nNorthern or Eastern end Columns 36-57", - "download": "http://jse.amstat.org/datasets/ushighway1.dat.txt", - "filename": "ushighway1", - "name": " US INTERSTATE SYSTEM I", - "number_format": 31, - "remove_quotes": true, - "separator": "SPACE", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": false - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "This dataset consists of a listing of all US interstate\nhighways, treating the highway/state combination \nas the sampling unit. Three principal cities through which the\nhighway runs are given for each state. Therefore, a single highway \nmay have several observations, one observation for each state.", - "download": "http://jse.amstat.org/datasets/ushighway2.dat.txt", - "filename": "ushighway2", - "name": "US INTERSTATE SYSTEM II", - "number_format": 31, - "remove_quotes": true, - "separator": "SPACE", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": false - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "This dataset consists of a listing of all US 3-digit interstate\nhighways (spurs and connecters), treating the highway/state\ncombination as the sampling unit.", - "download": "http://jse.amstat.org/datasets/ushighway3.dat.txt", - "filename": "ushighway3", - "name": "US INTERSTATE SYSTEM III", - "number_format": 31, - "remove_quotes": true, - "separator": "SPACE", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": false } ], "subcategory_name": "Travel" }, { "datasets": [ - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The data give the number of deaths cuased by firearms in Australia from 1983 to 1997, expressed as a rate per 100,000 of population. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nYear \n\nYear\n\nRate \n\nNumber of deaths caused by firearms per 100,000 population\n", - "download": "http://www.statsci.org/data/oz/firearms.txt", - "filename": "firearms", - "name": "Deaths Caused by Firearms", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Criminologists are interested in the effect of punishment regimes on crime rates. This has been studied using aggregate data on 47 states of the USA for 1960. The data set contains the following columns: \n \nVariable\n \nDescription\n\nM\n\npercentage of males aged 14–24 in total state population\n\nSo\n\nindicator variable for a southern state \n\nEd\n\nmean years of schooling of the population aged 25 years or over\n\nPo1\n\nper capita expenditure on police protection in 1960 \n\nPo2\n\nper capita expenditure on police protection in 1959 \n\nLF\n\nlabour force participation rate of civilian urban males in the age-group 14-24\n\nM.F\n\nnumber of males per 100 females \n\nPop\n\nstate population in 1960 in hundred thousands\n\nNW\n\npercentage of nonwhites in the population \n\nU1\n\nunemployment rate of urban males 14–24 \n\nU2\n\nunemployment rate of urban males 35–39 \n\nWealth\n\nwealth: median value of transferable assets or family income\n\nIneq\n\nincome inequality: percentage of families earning below half the median income\n\nProb\n\nprobability of imprisonment: ratio of number of commitments to number of offenses\n\nTime\n\naverage time in months served by offenders in state prisons before their first release\n\nCrime\n\ncrime rate: number of offenses per 100,000 population in 1960\n", - "download": "http://www.statsci.org/data/general/uscrime.txt", - "filename": "uscrime", - "name": "Effect of Punishment Regimes on Crime Rates", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "A number of homicide incidents in Australia have involved multiple killings. A multiple killing is defined as any incident where two or more persons are murdered. According to available literature, there have been 24 multiple killings by firearm between 1987 and 1996. These resulted in 128 deaths. The data give the number of multiple killings which have been recorded for the period 1987 to 28 April 1996. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nYear \n\n1987 - 1996.\n\nIncidents \n\nNumber of multiple killings\n\nDeaths \n\nTotal number of deaths\n\n\n\n\nThe data for the year 1996 include killings only up to and including 28 April.", - "download": "http://www.statsci.org/data/oz/multkill.txt", - "filename": "multkill", - "name": "Multiple Killings Committed with a Firearm", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "A survey was conducted in the United States and 10 countries of Western Europe to determine the percentage of teenagers who had used marijuana and other drugs. The data give percentages of drug use by country. ", "download": "https://dasl.datadescription.com/download/data/3178", "filename": "Drug-abuse", "name": "Drug abuse", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "The 2013 World Drug Report investigated the prevalence of drug use as a percentage of the population aged 15 to 64. Data from 32 European countries are shown.", "download": "https://dasl.datadescription.com/download/data/3179", "filename": "Drug-use-2013", "name": "Drug use 2013", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "Prisons 2014", "download": "https://dasl.datadescription.com/download/data/3406", "filename": "Prisons-2014", "name": "Prisons 2014", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true } ], "subcategory_name": "Crime" }, { "datasets": [ - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "An individual's critical flicker frequency is the highest frequency at which the flicker in a flickering light source can be detected. At frequencies above the critical frequency, the light source appears to be continuous even though it is actually flickering. This investigation recorded critical flicker frequency and iris colour of the eye for 19 subjects. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nColour\n\nEye colour: Brown, Green or Blue\n\nFlicker\n\nCritical flicker frequency in cycles/sec\n", - "download": "http://www.statsci.org/data/general/flicker.txt", - "filename": "flicker", - "name": "Eye Colour and Flicker Frequency", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "The data are a random sample from the data in Population commute times.", "download": "https://dasl.datadescription.com/download/data/3123", "filename": "Commute-times-sample100", "name": "Commute times sample100", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "-", - "download": "https://dasl.datadescription.com/download/data/3137", - "filename": "Couples", - "name": "Couples", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Data give the mortality rate (deaths per 100,000 people) and the education level (average number of years in school) for 58 U.S. cities. ", - "download": "https://dasl.datadescription.com/download/data/3183", - "filename": "Education-and-mortality", - "name": "Education and mortality", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Students in a large statistics class were asked to report the eye color and hair color. Is there an association? ", - "download": "https://dasl.datadescription.com/download/data/3197", - "filename": "Eye-and-Hair-color", - "name": "Eye and Hair color", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Eurostat, an agency of the European Union (EU), conducts surveys on several aspects of daily life in EU countries. Recently, the agency asked samples of 1000 respondents in each of 14 European countries whether they read the newspaper on a daily basis. ", - "download": "https://dasl.datadescription.com/download/data/3363", - "filename": "Newspapers", - "name": "Newspapers", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Population Commute Times", - "download": "https://dasl.datadescription.com/download/data/3401", - "filename": "Population-Commute", - "name": "Population Commute Times", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "the percentage change in population for the 50 states and the District of Columbia from the 2000 census to the 2010 census. ", - "download": "https://dasl.datadescription.com/download/data/3402", - "filename": "Population-growth-2010", - "name": "Population growth 2010", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Crowd Management Strategies monitors accidents at rock concerts. In their database, they list the names and other variables of victims whose deaths were attributed to “crowd crush” at rock concerts. The data give the victims’ ages for data from a one-year period: ", - "download": "https://dasl.datadescription.com/download/data/3429", - "filename": "Rock-concert-deaths", - "name": "Rock concert deaths", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "A study at a liberal arts college attempted to find out whether men and women watch the same amount of TV, on average and whether it mattered if students were varsity athletes or not. Student researchers asked 200 randomly selected students questions about their backgrounds and about their television-viewing habits and received 197 legitimate responses. The researchers found that men watch, on average, about 2.5 hours per week more TV than women, and that varsity athletes watch about 3.5 hours per week more than those who are not varsity athletes. But is this the whole story? To investigate further, they divided the students into four groups: male athletes (MA), male non-athletes (MNA), female\nathletes (FA), and female non-athletes (FNA).", - "download": "https://dasl.datadescription.com/download/data/3504", - "filename": "TV-watching", - "name": "TV watching", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Insurance companies and other organizations use actuarial tables to estimate the remaining lifespans of their customers. The data file gives estimated life expectancy and additional years of life for black males in the United States, according to a 2016 National Vital Statistics Report, A regression model to predict Life expectancy from Age appears to fit well, but consider the residuals.", - "download": "https://dasl.datadescription.com/download/data/3542", - "filename": "Years-to-live", - "name": "Years to live 2016", + "DateTime_format": "yyyy-MM-dd", + "comment_character": "#", + "create_index_column": false, + "description": "-", + "download": "https://dasl.datadescription.com/download/data/3137", + "filename": "Couples", + "name": "Couples", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "Fortune magazine collected the zodiac signs of 256 heads of the largest 400 companies. The data shows the number of births for each sign. ", - "download": "https://dasl.datadescription.com/download/data/3547", - "filename": "Zodiac", - "name": "Zodiac", + "description": "Data give the mortality rate (deaths per 100,000 people) and the education level (average number of years in school) for 58 U.S. cities. ", + "download": "https://dasl.datadescription.com/download/data/3183", + "filename": "Education-and-mortality", + "name": "Education and mortality", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "For each of the forty largest countries in the world (according to 1990\npopulation figures), data are given for the country's life expectancy\nat birth, number of people per television set, and number of people per\nphysician.", - "download": "http://jse.amstat.org/datasets/televisions.dat.txt", - "filename": "televisions", - "name": "Televisions, Physicians, and Life Expectancy", + "description": "Students in a large statistics class were asked to report the eye color and hair color. Is there an association? ", + "download": "https://dasl.datadescription.com/download/data/3197", + "filename": "Eye-and-Hair-color", + "name": "Eye and Hair color", "number_format": 31, "remove_quotes": true, - "separator": "SPACE", + "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "The file ch12.dat contains the following variables:\n\nlstay: Length of stay of a resident\nage: Age of a resident\ntrt: Nursing home assignment (1: receive treament,0: control)\ngender: Gender (1:male,0:female)\nmarstat: Marital status (1: married,0: not married)\nhlstat: Health status (2: second best, 5: worst)\ncens: Censoring indicator (1:censored, 0: discharged)", - "download": "http://lib.stat.cmu.edu/datasets/csb/ch12.dat", - "filename": "Nursing-Home-Usage", - "name": "\nParametric Duration Analysis of Nursing Home Usage", + "description": "Eurostat, an agency of the European Union (EU), conducts surveys on several aspects of daily life in EU countries. Recently, the agency asked samples of 1000 respondents in each of 14 European countries whether they read the newspaper on a daily basis. ", + "download": "https://dasl.datadescription.com/download/data/3363", + "filename": "Newspapers", + "name": "Newspapers", "number_format": 31, "remove_quotes": true, - "separator": "SPACE", + "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, - "use_first_row_for_vectorname": false + "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "This data set was derived from sample survey data collected in 1988\nin two surveys designed to evaluate the City of Toronto Workplace\nSmoking By-law (National Health Research and Development Program,\nCanada, Project Grant 6606-3346-46). The principal investigator\nwas Dr. L.L. Pederson, University of Western Ontario, Ontario,\nCanada. The surveys were conducted by the Institute for Social\nResearch at York University, Ontario, Canada in January-February\n1988 and in November-December 1988. By agreement with the\nInstitute for Social Research, York University, the survey data are\nin the public domain. This data set can be used freely for\nnoncommercial purposes and can be freely distributed.\n\nThere are 15 variables in the data set, with values separated by\nblanks. There are no missing values. The CSB variable names are as\nfollows: \n\nidno y w x1 x2 x3 z1 z2 z3 z4 z5 z6 z7 z8 z9\n\n\nSHORT DESCRIPTION NAME DEFINITION AND CODING\n\nUnique identifier idno (5 digits, beginning with 1 or 2)\n\nOutcome y Attitude toward smoking in the\n workplace. Smoking should be: \n (1 = prohibited, 2 = restricted,\n 0 = unrestricted)\n\nWeight w Sampling/post-stratification weight\n (ranges from 0.305 to 4.494)\n\nTime x1 Time of survey relative to\n implementation of the by-law \n on March 1, 1988\n (1 = post, 0 = pre)\n\nWork x2 Place of work indicator 1\n with City of Toronto as baseline\n (1 = outside City of Toronto,\n 0 = otherwise)\n\n x3 Place of work indicator 2\n with City of Toronto as baseline\n (1 = not outside the home, \n 0 = otherwise)\n\nResidence z1 Place of residence\n (1 = City of Toronto, \n 0 = other Metro Toronto)\n\nSmoking z2 Smoking status indicator 1\n with those who have never smoked \n as the baseline\n (1 = current smoker, \n 0 = otherwise)\n\n z3 Smoking status indicator 2\n with never as the baseline\n (1 = quit <=6 months ago, \n 0 = otherwise)\n\n z4 Smoking status indicator 3\n with never as the baseline\n (1 = quit >6 months ago, \n 0 = otherwise)\n\n z5 Smoking status indicator 4\n with quit >12 months as the baseline\n (1 = quit 6-12 months, \n 0 = otherwise)\n\nKnowledge z6 Knowledge of health effects of\n environmental tobacco smoke\n (score, ranges from 0 to 12)\n\nSex z7 Sex of respondent\n (1 = male, 0 = female)\nAge z8 Age of respondent\n ( (age in years - 50)/10 )\n\nEducation z9 Level of education\n (-2 = elementary, \n -1 = some high school, \n 0 = high or trade school, \n 1 = college or some university,\n 2 = university degree)\n ", - "download": "http://lib.stat.cmu.edu/datasets/csb/ch13.dat", - "filename": "Smoking-Restrictions", - "name": "Analysis of Attitudes Towards Workplace Smoking Restrictions", + "description": "Population Commute Times", + "download": "https://dasl.datadescription.com/download/data/3401", + "filename": "Population-Commute", + "name": "Population Commute Times", "number_format": 31, "remove_quotes": true, - "separator": "SPACE", + "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, - "use_first_row_for_vectorname": false - } - ], - "subcategory_name": "Population" - }, - { - "datasets": [ + "use_first_row_for_vectorname": true + }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "The data were collected as part of a time study for Telecom, now known as Telstra. The purpose if the study was to model the total hours worked in a section of Telecom in terms of the counts of various tasks. It was hoped that such a model could be used to predict hours worked and hence staffing requirements in changing circumstances. The number of hours worked by employees in a fault reporting centre were recorded, together with the number of faults of each type which were recorded. \nEmployees often work on a flexitime system which allows them to build up time and to leave early every second Friday. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nHours\n\nNumber of hours worked\n\nByDa\n\nNumber of talks of a certain type\n\nPR\n\n\n\nRWT\n\nA type of fault variable \n\nFault\n\n\n\nSOA\n\nNumber of service orders of type A \n\nSOB\n\nNumber of service orders of type B \n\nSOC\n\nNumber of service orders of type C \n\nCable\n\n\n\nField\n\nField call \n\nHot\n\nHotline \n\nREST\n\n\n\nSpec\n\n\n\nApp\n\n\n\nProb\n\n\n\nSC\n\n\n\nHO\n\n\n\nMO\n\n\n\nDay\n\nDay of the week: 1-Monday, 2-Tuesday, 3-Wednesday, 4-Thursday, 5-Friday \n", - "download": "http://www.statsci.org/data/oz/telecom.txt", - "filename": "telecom", - "name": "Telecom Work Measurement Study", + "description": "the percentage change in population for the 50 states and the District of Columbia from the 2000 census to the 2010 census. ", + "download": "https://dasl.datadescription.com/download/data/3402", + "filename": "Population-growth-2010", + "name": "Population growth 2010", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "An industrial Taguchi experiment was performed to study the influence of several controllable factors on the mean value and the variation in the percentage of shrinkage of products made by injection moulding. For studying the variation, three noise factors were also included in the design. All factors were set at two levels. \nThe problem is a `nominal-is-best' problem where the aim is to reach a certain tartet for the percentage shrinkage, at the same time having as small as variation as possible about the target value. The design that was applied is a so-called Taguchi L8(27)-design with seven controllable factors. At each setting of the controllable factors, the noise factors were varied according to a Taguchi L4(23)-design. \n\n\n\n\nVariable\n\nDescription\n\n\n\n\nControllable Factors:\n\nCycle\n\nCycle time\n\nMould\n\nMould temperature\n\nCavity\n\nCavity thickness\n\nPressure\n\nHolding pressure\n\nSpeed\n\nInjection speed\n\nTime\n\nHolding time\n\nGate\n\nGate size\n\nNoise Factors:\n\nRegrind\n\nPercentage regrind\n\nMoisture\n\nMoisture content\n\nTemperature\n\nAmbient temperature\n\nResponse:\n\nShrinkage\n\nPercentage shrinkage\n", - "download": "http://www.statsci.org/data/general/injmould.txt", - "filename": "injmould", - "name": "Injection Moulding Shrinkage", + "description": "Crowd Management Strategies monitors accidents at rock concerts. In their database, they list the names and other variables of victims whose deaths were attributed to “crowd crush” at rock concerts. The data give the victims’ ages for data from a one-year period: ", + "download": "https://dasl.datadescription.com/download/data/3429", + "filename": "Rock-concert-deaths", + "name": "Rock concert deaths", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "The data give the ambient temperature and the number of primary O-rings damaged for 23 of the 24 space shuttle launches before the launch of the space shuttle Challenger on January 20, 1986. (Challenger was the 25th shuttle. One engine was lost at sea and could not be examined.) Each space shuttle contains 6 primary O-rings. \nThe forecast temperate of the launching day of the Challenger was 31 degrees F. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nTemp\n\nAmbient temperature\n\nDamaged\n\nNumber of O-rings damaged\n", - "download": "http://www.statsci.org/data/general/challenger.txt", - "filename": "challenger", - "name": "Space Shuttle Challenger", + "description": "A study at a liberal arts college attempted to find out whether men and women watch the same amount of TV, on average and whether it mattered if students were varsity athletes or not. Student researchers asked 200 randomly selected students questions about their backgrounds and about their television-viewing habits and received 197 legitimate responses. The researchers found that men watch, on average, about 2.5 hours per week more TV than women, and that varsity athletes watch about 3.5 hours per week more than those who are not varsity athletes. But is this the whole story? To investigate further, they divided the students into four groups: male athletes (MA), male non-athletes (MNA), female\nathletes (FA), and female non-athletes (FNA).", + "download": "https://dasl.datadescription.com/download/data/3504", + "filename": "TV-watching", + "name": "TV watching", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "The data consist of failures of a piece of electronic equipment operating in two modes. For each operating period, Mode1 is the time spent operating in one mode and Mode2 is the time spent operating in the other. The total number of failures recorded in each period is recorded. \n \n\n\n \nVariable \n \nDescription\n \n\n\n \nMode1 \n \nTime in operating mode 1\n \nMode2 \n \nTime in operating mode 2\n \nFailures\n \nNumber of failures\n \n\n\n", - "download": "http://www.statsci.org/data/general/twomodes.txt", - "filename": "twomodes", - "name": "Failures of Electronic Equipment", + "description": "Insurance companies and other organizations use actuarial tables to estimate the remaining lifespans of their customers. The data file gives estimated life expectancy and additional years of life for black males in the United States, according to a 2016 National Vital Statistics Report, A regression model to predict Life expectancy from Age appears to fit well, but consider the residuals.", + "download": "https://dasl.datadescription.com/download/data/3542", + "filename": "Years-to-live", + "name": "Years to live 2016", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "Sanford Weisberg writes \nWhen gasoline is pumped into a tank, hydrocarbon vapors are forced out of a tank and into the atmosphere. To reduce this significant source of air pollution, devices are installed to capture the vapor. In testing these vapor recovery systems, the amount that escapes cannot be measured, but a \"sniffer\" can determine if some vapor is escaping. Also, the amount that is recovered can be measured. To estimate the efficiency of the system, some method of estimating the total amount given off must be used. To this end, a laboratory experiment was conducted in which the amount of vapor given off was measured under carefully controlled conditions. Four variables are relevant for modeling. In an experiment, these conditions were varied and the quantity of emitted hydrocarbons was measured in grams. \n\n\n \n \nVariable\n \nDescription\n\n\n\n\n \n \nTankTemp\n - \ninitial tank temperature (�F)\n\n\nGasTemp\n - \ntemperature of the dispensed gasoline (�F)\n\n\nTankPres\n - \ninitial vapor pressure in the tank (psi)\n\n\nGasPres\n - \nvapor pressure of the dispensed gasoline (psi)\n\n\nHC\n - \nemitted hydrocarbons (g)\n\n\n", - "download": "http://www.statsci.org/data/general/gasvapor.txt", - "filename": "gasvapor", - "name": "Sniffing for Hydrocarbon Vapour", + "description": "Fortune magazine collected the zodiac signs of 256 heads of the largest 400 companies. The data shows the number of births for each sign. ", + "download": "https://dasl.datadescription.com/download/data/3547", + "filename": "Zodiac", + "name": "Zodiac", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true - }, + } + ], + "subcategory_name": "Population" + }, + { + "datasets": [ { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "https://en.wikipedia.org/wiki/List_of_U.S._states_by_electricity_production_from_renewable_sources", "download": "https://dasl.datadescription.com/download/data/3051", "filename": "Alternative-energy", "name": "Alternative energy 2016", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "In a statement to a Senate Public Works Committee, a senior executive of Texaco, Inc., cited a study on the effectiveness of auto filters on reducing noise. Because of concerns about performance, two types of filters were studied, a standard silencer and a new device developed by the Associated Octel Company. Noise is in decibels/10. […] ", "download": "https://dasl.datadescription.com/download/data/3058", "filename": "Auto-noise-filters", "name": "Auto noise filters", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "A student experiment was run to test the performance of 4 brands of batteries under 2 different Environments (room temperature and cold). For each of the 8 treatments, 2 batteries of a particular brand were put into a flashlight. The flashlight was then turned on and allowed to run until the light went out. The […] ", "download": "https://dasl.datadescription.com/download/data/3070", "filename": "Batteries", "name": "Batteries", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "Stopping distances in feet for a car tested 3 times at each of 5 speeds. We hope to create a model that predicts Stopping Distance from the Speed of the car. ", "download": "https://dasl.datadescription.com/download/data/3086", "filename": "Brakes", "name": "Brakes", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "Measurements on 38 1978-79 model automobiles. Gas mileage in miles per gallon as measured by Consumers’ Union on a test track. Other values as reported by automobile manufacturer. Used to illustrate regression model building and diagnosis. Be sure to check the residuals when predicting MPG. ", "download": "https://dasl.datadescription.com/download/data/3096", "filename": "Cars", "name": "Cars", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "A start-up company has developed an improved electronic chip for use in laboratory equipment. The company needs to project the manufacturing cost, so it develops a spreadsheet model that takes into account the purchase of production equipment, overhead, raw materials, depreciation, maintenance, and other business costs. The spreadsheet estimates the cost of producing 10,000 to […] ", "download": "https://dasl.datadescription.com/download/data/3109", "filename": "Chips", "name": "Chips", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "-", "download": "https://dasl.datadescription.com/download/data/3126", "filename": "Computer-chip", "name": "Computer chip manufacturing", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "Dalia collects data via smartphone from users worldwide. This survey asked (among many other questions) about access to cars and the use of ride-hailing apps ", "download": "https://dasl.datadescription.com/download/data/3153", "filename": "Dalia", "name": "Dalia", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "Disk drive capacity is often given in terabytes (TB), where 1 TB = 1000 gigabytes, or about a trillion bytes. A search of prices for external disk drives on Amazon.com in mid-2016 found the data on capacity and price. ", "download": "https://dasl.datadescription.com/download/data/3167", "filename": "Disk-drives", "name": "Disk drives 2016", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "Most water tanks have a drain plug so that the tank may be emptied when it’s to be moved or repaired. How long it takes a certain size of tank to drain depends on the size of the plug, as shown in the table. ", "download": "https://dasl.datadescription.com/download/data/3175", "filename": "Down-the-Drain", "name": "Down the Drain", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "A university teacher saved every e-mail receive from students in a large introductory statistics class during one term. He then counted, for each student who had sent him at least one e-mail, how many e-mails each student had sent. What is the distribution of e-mail communications? ", "download": "https://dasl.datadescription.com/download/data/3181", "filename": "E-mails", "name": "E-mails", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "Fuel economy (mpg) and the number of cylinders in a sample of cars. Data extracted from a larger cars dataset. ", "download": "https://dasl.datadescription.com/download/data/3226", "filename": "Fuel-economy-and-cylinders", "name": "Fuel economy and cylinders", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "An experiment to test a new gasoline additive, Gasplus, was performed on three different cars: a sports car, a minivan, and a hybrid. Each car was tested with both Gasplus and regular gas on 10 different occasions and their gas mileage was recorded.", "download": "https://dasl.datadescription.com/download/data/3231", "filename": "Gas-additives", "name": "Gas additives", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "Internet users 2014", "download": "https://dasl.datadescription.com/download/data/3299", "filename": "Internet-users", "name": "Internet users 2014", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "iPod failures", "download": "https://dasl.datadescription.com/download/data/3300", "filename": "iPod-failures", "name": "iPod failures", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "Richard DeVeaux owned a Nissan Maxima for 8 years. He\nrecorded the car’s fuel efficiency (in mpg) each time he filled the tank. He wanted to know what fuel efficiency to expect as “ordinary” for his car. Knowing this, he was able to predict when he’d need to fill the tank again and to notice if the fuel efficiency suddenly got worse, which could be a sign of trouble.", "download": "https://dasl.datadescription.com/download/data/3367", "filename": "Nissan", "name": "Nissan", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "Costs of construction for 32 light water nuclear plants.", "download": "https://dasl.datadescription.com/download/data/3554", "filename": "Nuclear-plants", "name": "Nuclear plants", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "Pew Research conducted a survey about social networking in several countries. They asked whether respondents had access to and used social networking. Responses were “yes” (use social networking), “no”, and “not available”.", "download": "https://dasl.datadescription.com/download/data/3457", "filename": "Social-networking", "name": "Social networking", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "Cnet.com tests tablet computers and continuously updates its list. As of January 2014, the list included the battery life (in hours) and luminous intensity (i.e., screen brightness, in cd/m^2). ", "download": "https://dasl.datadescription.com/download/data/3474", "filename": "Tablet-computers-2014", "name": "Tablet computers 2014", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "Should you generate electricity with your own personal\nwind turbine? That depends on whether you have enough\nwind on your site. To produce enough energy, your site should\nhave an annual average wind speed above 8 miles per hour, according\nto the Wind Energy Association. One candidate site was\nmonitored for a year, with wind speeds recorded every 6 hours.\nA total of 1114 readings of wind speed averaged 8.019 mph with\na standard deviation of 3.813 mph. The data are provided.", "download": "https://dasl.datadescription.com/download/data/3527", "filename": "Wind-power", "name": "Wind power", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The data set gives a random sample of the length of visits of users entering the msnbc.com web site during September 28, 1999.\nThe length of the visit is an estimate of the total number of clicks or pages seen by each user and is based on web server \nlogs, thus it counts only pages recorded by the server. Pages cached in the user's browser or in a cache proxy server are \nunknown. The data set used in the paper is much larger than the one made available here but that larger data set is also \navailable in a page cited in the references. ", - "download": "http://jse.amstat.org/datasets/msnbclength.dat.txt", - "filename": "msnbclength", - "name": "Internet Data Analysis for Undergrad Curriculum", - "number_format": 31, - "remove_quotes": true, - "separator": ",", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": false - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The video lottery terminal dataset contains observations on the three\nwindows of an electronic slot machine for 345 plays together with the\nprize paid out for each play. The prize payout distribution is so\nbadly skewed that confidence intervals for expected payout based on the\ncentral limit theorem are not accurate. The dataset can be used at the\ngraduate or upper undergraduate level to illustrate parametric\nbootstrapping. The dataset can also be used in a graduate course to\nillustrate tests of independence for two and three-way contingency\ntables involving random zeroes, or these tables may be collapsed and\nused as examples in an introductory course.", - "download": "http://jse.amstat.org/datasets/vlt.dat.txt", - "filename": "vlt_", - "name": "Video Lottery Terminal Data", - "number_format": 31, - "remove_quotes": true, - "separator": "SPACE", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": false } ], "subcategory_name": "Technology" }, { "datasets": [ { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "The Pew Research Center conducted a representative telephone survey in October of 2016. Among the reported results was the following table concerning the preferred political party affiliation of respondents and their ages for white voters. Is there evidence of age-based differences in party affiliation in the United States for white voters? ", "download": "https://dasl.datadescription.com/download/data/3045", "filename": "Age-and-party", "name": "Age and party 2016", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "The outcome of the 2000 U.S. presidential election was determined in Florida amid much\ncontroversy. Even years later, historians continue to debate who really received the most\nvotes. The main race was between George W. Bush and Al Gore, but two minor candidates\nplayed a significant role. To the political right of the major party candidates was Pat\nBuchanan, while to the political left was Ralph Nader. Generally, Nader earned more votes\nthan Buchanan throughout the state. We would expect counties with larger vote totals to\ngive more votes to each candidate. The dataset gives Buchanan’s and Nader’s vote totals by\ncounty in the state of Florida. Plot to identify the outlier and consider what it means.", "download": "https://dasl.datadescription.com/download/data/3187", "filename": "Election-2000", "name": "Election 2000", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "-", "download": "https://dasl.datadescription.com/download/data/3201", "filename": "Female-president", "name": "Female president", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "How accurate are pollsters in predicting the outcomes of Congressional elections? The table shows the actual number of Democrat seats in the House of Representatives and the number predicted by the Gallup organization for nonpresidential election years in the 4 decades following World War II. ", "download": "https://dasl.datadescription.com/download/data/3564", "filename": "Polling", "name": "Polling", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "For each U.S. Senator, his or her votes on whether to remove President\nClinton on each of the two articles of impeachment (plus a summary\nvariable representing each Senator's number of \"guilty\" votes) are\nprovided, as well as each Senator's values on several variables that\ncould be predictive of vote (e.g., Senator's degree of conservatism,\nhow well Clinton did in the Senator's state in the 1996 Presidential\nelection).", - "download": "http://jse.amstat.org/datasets/impeach.dat.txt", - "filename": "impeach", - "name": " U.S. Senate Votes on Clinton Removal", - "number_format": 31, - "remove_quotes": true, - "separator": "SPACE", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": false - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The data consist of the numbers of days served in office for the 43 \nPresidents of the United States as of 4 February 2004. ", - "download": "http://jse.amstat.org/datasets/outlier.dat.txt", - "filename": "outlier", - "name": "A Dataset That Is 44% Outliers", - "number_format": 31, - "remove_quotes": true, - "separator": "SPACE", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": false } ], "subcategory_name": "Politics" }, { "datasets": [ { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "1998 Baby data from http://www.nber.org/natality/ftp.cdc.gov/pub/Health_Statistics/NCHS/Dataset_Documentation/DVS/natality/", "download": "https://dasl.datadescription.com/download/data/3059", "filename": "Babysamp", "name": "Babysamp 98", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "Births per 1000 population in the United States, starting in 1965. There has been concern that the birthrate may be declining. A good model for tends in birthrate may allow for some prediction. ", "download": "https://dasl.datadescription.com/download/data/3075", "filename": "Birthrates-2015", "name": "Birthrates 2015", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "In a Chance magazine article (Summer 2005), Danielle Vasilescu and Howard Wainer used data from the United Nations Center for Human Settlements to investigate aspects of living conditions for several countries. Among the variables they looked at were the country’s per capita gross domestic product (GDP, in $) and Crowdedness, defined as the average number of persons per room living in homes there.\nVasilescu and Wainer re-express GDP to -10000/GDP. Doing that reveals an outlier that may be due to an error in the data.", "download": "https://dasl.datadescription.com/download/data/3148", "filename": "Crowdedness", "name": "Crowdedness", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "-", "download": "https://dasl.datadescription.com/download/data/3237", "filename": "GDP-DJIA", "name": "GDP and DJIA 2017", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "Data for 800 respondents in each of five countries. The variables provide demographic information (sex, age, education, marital status) and responses to questions of interest to marketers on personal finance and purchasing. ", "download": "https://dasl.datadescription.com/download/data/3242", "filename": "Global", "name": "Global", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "The dataset gives profits (in $M) for 30 of the 500 largest global corporations (as measured by revenue). ", "download": "https://dasl.datadescription.com/download/data/3243", "filename": "Global500-2014", "name": "Global500 2014", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "In an investigation of environmental causes of disease, data were collected on the annual mortality rate (deaths per 100,000) for males in 61 large towns in England and Wales. In addition, the water hardness was recorded as the calcium concentration (parts per million, ppm) in the drinking water.", "download": "https://dasl.datadescription.com/download/data/3255", "filename": "Hard-water", "name": "Hard water", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "The United Nations Development Programme (UNDP) uses the Human Development Index (HDI) in an attempt to summarize in one number the progress in health, education, and economics of a country. In 2015, the HDI was as high as 0.94 for Norway and as low as 0.35 for Niger. ", "download": "https://dasl.datadescription.com/download/data/3258", "filename": "HDI-2015", "name": "HDI 2015", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "The United Nations Development Programme (UNDP) uses the Human Development Index (HDI) in an attempt to summarize in one number the progress in health, education, and economics of a country. ", "download": "https://dasl.datadescription.com/download/data/3259", "filename": "HDI-2016", "name": "HDI 2016", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "Life expectancy at birth in 195 countries. ", "download": "https://dasl.datadescription.com/download/data/3312", "filename": "Life-Expectancy", "name": "Life Expectancy", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "Here is a table from the National Vital Statistics Report that gives the Life Expectancy for white males in the United States every decade during the 20th century (1 = 1900 to 1910, 2 = 1911 to 1920, etc.). Does a linear model relating life expectancy to decade fit? Would re-expressing either variable help?", "download": "https://dasl.datadescription.com/download/data/3313", "filename": "Life-expectancy-US", "name": "Life expectancy US", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "Age at first marriage has changed over the course of the past century. In addition, the difference in the age of the husband and of the wife at first marriage has changed. Both the ages and the difference in ages can be interesting to analyze. ", "download": "https://dasl.datadescription.com/download/data/3329", "filename": "Marriage-age-2015", "name": "Marriage age 2015", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "Age at first marriage has changed over the course of the past century. In addition, the difference in the age of the husband and of the wife at first marriage has changed. Both the ages and the difference in ages can be interesting to analyze. ", "download": "https://dasl.datadescription.com/download/data/3330", "filename": "Marriage-age-2016", "name": "Marriage age 2016", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "The estimated median age at fist marriage by sex from 1890 to 2017 is provided by the U.S. Census bureau. Since 1960, marriage ages have been increasing steadily. Has the difference between men’s and women’s first marriage age changed? ", "download": "https://dasl.datadescription.com/download/data/3331", "filename": "Marriage-age-2017", "name": "Marriage age 2017", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "Source: JAMA 284 [2000]:335–341) \nNumber of Cases: 278", "download": "https://dasl.datadescription.com/download/data/3506", "filename": "Twin-Births", "name": "Twin Births", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "In January 2012, the New York Times\npublished a story called “Twin Births in the U.S., Like Never\nBefore,” in which they reported a 76 percent increase in the\nrate of twin births from 1980 to 2009. The dataset gives the number\nof twin births each year (per 1000 live births). Can you confirm the Times report?\nThe dataset also includes the atmospheric CO2 levels (ppm) for those years to offer an alternative predictor in case there appears to be an argument for causation.", "download": "https://dasl.datadescription.com/download/data/3505", "filename": "Twins-by-Year", "name": "Twins by Year 2014", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "Working parents", "download": "https://dasl.datadescription.com/download/data/3539", "filename": "Working-parents", "name": "Working parents", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true } ], "subcategory_name": "Demographics" }, { "datasets": [ { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "A statistics professor at a large university polled his students to find out what their majors were and what position they held in the family birth order. The results are summarized in the table.", "download": "https://dasl.datadescription.com/download/data/3076", "filename": "Birth-order", "name": "Birth order", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "The technology committee at a school has stated that the average time spent by students per lab visit has increased and the increase supports their argument that they need to increase lab fees.\nTo substantiate this claim, the committee randomly sampled 12 student lab visits and noted the amount of time spent using the computer. The times in minutes are given:", "download": "https://dasl.datadescription.com/download/data/3127", "filename": "Computer-lab", "name": "Computer lab fees", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "Students in two basic Spanish classes were required to learn 50 new vocabulary words. One group of 45 students received the list on Monday and studied the words all week. Statistics summarizing this group’s scores on Friday’s quiz are given. The other group of 25 students did not get the vocabulary list until Thursday. They also took the quiz on Friday, after “cramming” Thursday night. Then, when they returned to class the following Monday, they were retested—without advance warning. Both sets of test scores for these students are given.", "download": "https://dasl.datadescription.com/download/data/3140", "filename": "Cramming", "name": "Cramming", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "-", "download": "https://dasl.datadescription.com/download/data/3184", "filename": "Education-by-age", "name": "Education by age", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "Is college worth the expense? Which colleges have graduates who earn the most? And what is the best predictor of earnings 5-years out? The data provide several possible predictors and background information suitable for building regression models. ", "download": "https://dasl.datadescription.com/download/data/3249", "filename": "Graduate-Earnings", "name": "Graduate Earnings", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "The National Center for Education Statistic reports average mathematics achievement scores for eighth graders in all 50 states.", "download": "https://dasl.datadescription.com/download/data/3332", "filename": "Math-scores-2013", "name": "Math scores 2013", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "Scores on SAT tests for 162 students at the same school. (The identity of the school is not provided for privacy.) How are Math and Verbal scores related? Would a regression model be appropriate? Is there a difference in male and female scores? How would that difference be modeled? ", "download": "https://dasl.datadescription.com/download/data/3438", "filename": "SAT-scores", "name": "SAT scores", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "A school district superintendent wants to test a new method of teaching arithmetic in the fourth grade at his 15 schools. He plans to select 8 students from each school to take part in the experiment, but to make sure they are roughly of the same ability, he first gives a test to all 120 students. The data hold the scores of the test by school.", "download": "https://dasl.datadescription.com/download/data/3439", "filename": "School-system", "name": "School system", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "The dataset contains data from a class survey ", "download": "https://dasl.datadescription.com/download/data/3465", "filename": "Student-survey", "name": "Student survey", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "Researchers randomly assigned subjects to take one of two tests (form A or form B) either electronically or with pencil and paper. Subjects then took the other test using the other method. The two forms had been designed to be equivalent in difficulty, but nevertheless, that equivalence was checked as part of the experiment. Our concern is whether subjects did equally well with each testing method.", "download": "https://dasl.datadescription.com/download/data/3466", "filename": "Student-testing", "name": "Student testing", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "Summer school", "download": "https://dasl.datadescription.com/download/data/3469", "filename": "Summer-school", "name": "Summer school", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "Tuition 2016", "download": "https://dasl.datadescription.com/download/data/3502", "filename": "Tuition-2016", "name": "Tuition 2016", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "https://collegescorecard.ed.gov/data/", "download": "https://dasl.datadescription.com/download/data/3503", "filename": "Tuition-All-Schools", "name": "Tuition All Schools 2016", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "The data give the mean ACT composite scores for all 450 Wisconsin public schools in 2015 along with the type of school and number of students.", "download": "https://dasl.datadescription.com/download/data/3533", "filename": "Wisconsin-ACT-2015", "name": "Wisconsin ACT 2015", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "Wisconsin ACT math", "download": "https://dasl.datadescription.com/download/data/3534", "filename": "Wisconsin-ACT-math", "name": "Wisconsin ACT math", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "This dataset contains variables that address the relationship between \npublic school expenditures and academic performance, as measured by the \nSAT. \n\nVARIABLE DESCRIPTIONS: \nColumns\n 1 - 16 Name of state (in quotation marks)\n18 - 22 Current expenditure per pupil in average daily attendance \n in public elementary and secondary schools, 1994-95 \n (in thousands of dollars)\n24 - 27 Average pupil/teacher ratio in public elementary and \n secondary schools, Fall 1994\n29 - 34 Estimated average annual salary of teachers in public \n elementary and secondary schools, 1994-95 (in thousands of \n dollars)\n36 - 37 Percentage of all eligible students taking the SAT, 1994-95\n39 - 41 Average verbal SAT score, 1994-95\n43 - 45 Average math SAT score, 1994-95\n47 - 50 Average total score on the SAT, 1994-95 ", - "download": "http://jse.amstat.org/datasets/sat.dat.txt", - "filename": "sat", - "name": "Getting What You Pay For: The Debate Over Equity in Public School Expenditures ", - "number_format": 31, - "remove_quotes": true, - "separator": "SPACE", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": false - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Data are from the 1995 U.S. News report on American colleges and\nuniversities. They include demographic information on tuition,\nroom & board costs, SAT or ACT scores, application/acceptance\nrates, student/faculty ratio, graduation rate, and more. The\ndataset is used for the 1995 Data Analysis Exposition, sponsored\nby the Statistical Graphics Section of the American Statistical\nAssociation. See the file colleges.txt for more information on \nthe Exposition.", - "download": "http://jse.amstat.org/datasets/usnews.dat.txt", - "filename": "usnews", - "name": "U.S. News College data", - "number_format": 31, - "remove_quotes": true, - "separator": ",", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": false } ], "subcategory_name": "Education" } ] }, { "category_name": "Physics", "subcategories": [ { "datasets": [ - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Why does the moon appear to be so much larger when it is near the horizon than when it is directly overhead? This question has produced a wide variety of theories from psychologists. An important early hypothesis was put forth by Holway and Boring (1940) who suggested that the illusion was due to the fact that when the moon was on the horizon, the observer looked straight at it with eyes level, whereas when it was at its zenith, the observer had to elevate his or her eyes as well as his or her head to see it. To test this hypothesis, Kaufman and Rock (1962) devised an apparatus that allowed them to present two artificial moons, one at the horizon and one at the zenith, and to control whether the subjects elevated their eyes or kept them level to see the zenith moon. The horizon, or comparison, moon was always viewed with eyes level. Subjects were asked to adjust the variable horizon moon to match the size of the zenith moon or vice versa. For each subject the ratio of the perceived size of the horizon moon to the perceived size of the zenith moon was recorded with eyes elevated and with eyes level. A ratio of 1.00 would represent no illusion. If Holway and Boring were correct, there should be a greater illusion in the eyes-elevated condition than in the eyes-level condition.", - "download": "http://www.statsci.org/data/general/moon.txt", - "filename": "moon", - "name": "The Moon Illusion", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The data give the magnitudes of a variable star at midnight on 600 consecutive nights. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nStar\n\nMagnitude on each night\n", - "download": "http://www.statsci.org/data/general/star.txt", - "filename": "star", - "name": "Magnitudes of a Variable Star", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The data gives a sequence of observations on the magnitude of a variable Cepheid star made from the Mount Stromlo Observatory near Canberra in Australia. The observations were made as part of the MACHO project. \nThe MACHO project monitors millions of stars every night with a dedicated telescope at Mount Stromlo Observatory. The collaboration is probing the halo of our galaxy in order to detect dark matter in the form of Massive Compact Halo Objects -- MACHOs. These are astronomical bodies that emit negligible visible light, such as dwarf or neutron stars, large planets, and black holes. Detection of a MACHO is achieved by observing its gravitational lensing effect on a chance background star as the MACHO crosses near the line of sight between the observer and this star. In order to detect a sufficiently large number of MACHOs, the project collects observations on an large number of distant stars over an extended period of time. Data are being collected daily over a 4-year period (weather permitting), on approximately 8 million stars in the Large Magellanic Cloud (LMC) and the bulge of the Milky Way. \nThis database is a valuable resource for many other types of astronomical research. It is the most comprehensive catalog of stars in the LMC and contains stars much dimmer than those covered by previous surveys. Temporal coverage is unusually long compared to most star surveys, which permits a comprehensive study of star variability, including long periods and transient phenomena. About 40,000 variable stars have been observed in the LMC and a similar number in the galactic bulge. \nVariable stars are stars for which the intensity of the emitted energy changes over time; for periodic variable stars the change of intensity is periodic over time. Common types of periodic variable stars include eclipsing binaries, RR Lyraes, and Cepheids. Cepheids are very bright stars with periods of 1-70 days. The light curve has an asymmetric shape, and rises more rapidly than it falls. Cepheids with periods of about 1 week tend to have a bump in the descending part of the curve. For periods of about 10 days, the bump is at the peak of the curve, and for longer periods it is on the rising part of the curve. The brightness changes are caused by periodic pulsation (contraction and expansion) of the stars and their outer layers. \nThere are numerous additional types of variable stars, and each of the categories above contains subcategories. For example, Beat Cepheids and Beat RR Lyrae oscillate at more than one frequency. Different classes of variable stars can be located in different regions of a plot of magnitude versus temperature or spectral type. For example, RR Lyrae and Cepheids lie on a strip called the \"instability strip.\" Different types of variable curves are classified also on the basis of the shapes of their light curves and the relationships of shapes to period, for example. As well as being important for studies of stellar structure and evolution, these classes are used to determine distances on a cosmic scale by means of the relationship of their periods to their magnitudes. \nObservations of these stars are typically made at rather irregular times, depending on observation schedules and sky conditions. Different observations have differing errors. All this makes determination of the periods and the shapes of the corresponding phased light curves an interesting statistical problem. \nThis particular star is a category 1 Cepheid (magnitude -9.166) at coordinates (1541.5,1395.1). Magnitudes were recorded in the blue band from 4500 to 6300 Angstroms. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nTime\n\nTime of observation in days\n\nMagnitude\n\nDifferential Magnitude\n\nSD\n\nStandard deviation of the magnitude observation. A value of -99 indicates missing and negative value indicate unreliable observations.\n", - "download": "http://www.statsci.org/data/oz/ceph1.txt", - "filename": "ceph1", - "name": "Magnitudes of Variable Star Cepheid 1", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The data gives observations on the magnitude of a Cepheid variable star made from the Mount Stromlo Observatory near Canberra in Australia. Magnitudes were recorded separately for the blue and red bands. Observation times were irregularly spaced depending on sky conditions and the observation schedule. The observations were made as part of the MACHO project. This particular star is Cepheid star number 2. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nTime\n\nTime of observation in days\n\nBlue\n\nDifferential magnitude in the blue band from 4500 to 6300 Angstroms\n\nBlueSD\n\nStandard deviation of the blue-band observation. A value of -99 indicates missing and negative values indicate unreliable observations.\n\nRed\n\nDifferential magnitude in the red band from 6300 to 7600 Anstroms\n\nRedSD\n\nStandard deviation of the red-band observation. A value of -99 indicates missing and negative values indicate unreliable observations.\n\n\n\n", - "download": "http://www.statsci.org/data/oz/ceph2.txt", - "filename": "ceph2", - "name": "Magnitudes of Variable Star Cepheid 2", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The data gives a sequence of observations on the magnitude of an eclipsing binary variable star made from the Mount Stromlo Observatory near Canberra in Australia. The observations were made as part of the MACHO project. \nThe MACHO project monitors millions of stars every night with a dedicated telescope at Mount Stromlo Observatory. The collaboration is probing the halo of our galaxy in order to detect dark matter in the form of Massive Compact Halo Objects -- MACHOs. These are astronomical bodies that emit negligible visible light, such as dwarf or neutron stars, large planets, and black holes. Detection of a MACHO is achieved by observing its gravitational lensing effect on a chance background star as the MACHO crosses near the line of sight between the observer and this star. In order to detect a sufficiently large number of MACHOs, the project collects observations on an large number of distant stars over an extended period of time. Data are being collected daily over a 4-year period (weather permitting), on approximately 8 million stars in the Large Magellanic Cloud (LMC) and the bulge of the Milky Way. \nThis database is a valuable resource for many other types of astronomical research. It is the most comprehensive catalog of stars in the LMC and contains stars much dimmer than those covered by previous surveys. Temporal coverage is unusually long compared to most star surveys, which permits a comprehensive study of star variability, including long periods and transient phenomena. About 40,000 variable stars have been observed in the LMC and a similar number in the galactic bulge. \nVariable stars are stars for which the intensity of the emitted energy changes over time; for periodic variable stars the change of intensity is periodic over time. Common types of periodic variable stars include eclipsing binaries, RR Lyraes, and Cepheids. Eclipsing binaries consist of two stars orbiting each other in a conformation relative to the observer such that brightness variability occurs as one star passes in front of the other in turn; as the stars may be of different brightness, the drop in light flux depends on which star is in the front. These stars have periods of between 3 hours and 24 years, although 0.5 to 10 days is the most common range. \nThere are numerous additional types of variable stars, and each of the categories above contains subcategories. For example, Beat Cepheids and Beat RR Lyrae oscillate at more than one frequency. Different classes of variable stars can be located in different regions of a plot of magnitude versus temperature or spectral type. For example, RR Lyrae and Cepheids lie on a strip called the \"instability strip.\" Different types of variable curves are classified also on the basis of the shapes of their light curves and the relationships of shapes to period, for example. As well as being important for studies of stellar structure and evolution, these classes are used to determine distances on a cosmic scale by means of the relationship of their periods to their magnitudes. \nObservations of these stars are typically made at rather irregular times, depending on observation schedules and sky conditions. Different observations have differing errors. All this makes determination of the periods and the shapes of the corresponding phased light curves an interesting statistical problem. \nThis particular star is a category 1 eclipsing binary (magnitude -10.26) at coordinates (1617.8, 669.35). Magnitudes were recorded in the blue band from 4500 to 6300 Angstroms. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nTime\n\nTime of observation in days\n\nMagnitude\n\nDifferential Magnitude\n\nSD\n\nStandard deviation of the magnitude observation. A value of -99 indicates missing and negative value indicate unreliable observations.\n", - "download": "http://www.statsci.org/data/oz/ecbi1041.txt", - "filename": "ecbi1041", - "name": "Magnitudes of an Eclipsing Binary Variable Star", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The data gives observations on the magnitude of a RR Lyrae variable star made from the Mount Stromlo Observatory near Canberra in Australia. Magnitudes were recorded separately for the blue and red bands. Observation times were irregularly spaced depending on sky conditions and the observation schedule. The observations were made as part of the MACHO project. This particular star is RRab Lyrae star number 1061, a category 1 star with an asymmetric signal. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nTime\n\nTime of observation in days\n\nBlue\n\nDifferential magnitude in the blue band from 4500 to 6300 Angstroms\n\nBlueSD\n\nStandard deviation of the blue-band observation. A value of -99 indicates missing and negative values indicate unreliable observations.\n\nRed\n\nDifferential magnitude in the red band from 6300 to 7600 Anstroms\n\nRedSD\n\nStandard deviation of the red-band observation. A value of -99 indicates missing and negative values indicate unreliable observations.\n", - "download": "http://www.statsci.org/data/oz/rrl1061.txt", - "filename": "rrl1061", - "name": "Magnitudes of Variable Star RR Lyrae 1061", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The data gives observations on the magnitude of a RR Lyrae variable star made from the Mount Stromlo Observatory near Canberra in Australia. Magnitudes were recorded separately for the blue and red bands. Observation times were irregularly spaced depending on sky conditions and the observation schedule. The observations were made as part of the MACHO project. This particular star is RRc Lyrae star number 1198, a category 1 star with a symmetric signal. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nTime\n\nTime of observation in days\n\nBlue\n\nDifferential magnitude in the blue band from 4500 to 6300 Angstroms\n\nBlueSD\n\nStandard deviation of the blue-band observation. A value of -99 indicates missing and negative values indicate unreliable observations.\n\nRed\n\nDifferential magnitude in the red band from 6300 to 7600 Anstroms\n\nRedSD\n\nStandard deviation of the red-band observation. A value of -99 indicates missing and negative values indicate unreliable observations.\n", - "download": "http://www.statsci.org/data/oz/rrl1198.txt", - "filename": "rrl1198", - "name": "Magnitudes of Variable Star RR Lyrae 1198", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The data gives observations on the magnitude of a RR Lyrae variable star made from the Mount Stromlo Observatory near Canberra in Australia. Magnitudes were recorded separately for the blue and red bands. Observation times were irregularly spaced depending on sky conditions and the observation schedule. The observations were made as part of the MACHO project. This particular star is RRc Lyrae star number 1263, a category 1 star with a symmetric signal. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nTime\n\nTime of observation in days\n\nBlue\n\nDifferential magnitude in the blue band from 4500 to 6300 Angstroms\n\nBlueSD\n\nStandard deviation of the blue-band observation. A value of -99 indicates missing and negative values indicate unreliable observations.\n\nRed\n\nDifferential magnitude in the red band from 6300 to 7600 Anstroms\n\nRedSD\n\nStandard deviation of the red-band observation. A value of -99 indicates missing and negative values indicate unreliable observations.\n\n\n\n", - "download": "http://www.statsci.org/data/oz/rrl1263.txt", - "filename": "rrl1263", - "name": "Magnitudes of Variable Star RR Lyrae 1263", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "On August 24, 2006, the International Astronomical Union voted that Pluto is not a planet. Some members of the public have been reluctant to accept that decision. The data show the average distance of each of the traditional nine planets from the sun. Is there a pattern? Does Pluto fit with the other “official” planets? ", "download": "https://dasl.datadescription.com/download/data/3397", "filename": "Planets", "name": "Planets", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "On August 24, 2006, the International Astronomical Union voted that Pluto is not a planet. Some members of the public have been reluctant to accept that decision. The data show a variety of facts about the 8 planets and Pluto. Exercises consider two models for the planets. Does Pluto behave like a planet? ", "download": "https://dasl.datadescription.com/download/data/3398", "filename": "Planets-more", "name": "Planets more data", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true } ], "subcategory_name": "Astronomy" }, { "datasets": [ - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "These data are the result of a study involving the analysis of performance degradation data from accelerated tests. The response variable is dialectric breakdown strength in kilo-volts, and the predictor variables are time in weeks and temperature in degrees Celcius. The study can be viewed as an 8 by 4 factorial experiment. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nStrength\n\nDialectric breakdown strength in kilovolts\n\nTime\n\nDuration of testing in weeks (8 levels)\n\nTemperature\n\nTemperature in degrees Celsius (4 levels)\n", - "download": "http://www.statsci.org/data/general/dialectr.txt", - "filename": "dialectr", - "name": "Dialectric Breakdown Strength", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - } - ], - "subcategory_name": "Electronics" - }, - { - "datasets": [ - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The data give time series measurements on waves emanating from a cylinder suspended in a tank of water. The waves are believed to show a high frequency vibration, which is an artifact of the experiment equipment, as well as lower frequency vibration which reflects forces acting on the cylinder. It is of interest to identify and to filter out the high frequency vibration. \n\n\n\n\nVariable\n\nDescription\n\n\n\n\nWaves\n\nRelative vertical displacement at equi-spaced times\n", - "download": "http://www.statsci.org/data/general/waves.txt", - "filename": "waves_", - "name": "Forces on a Cylinder Suspended in Water", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "Scientist Robert Boyle examined the relationship between the volume in which a gas is contained and the pressure in its container. He used a cylindrical container with a moveable top that could be raised or lowered to change the volume. He measured the Height in inches by counting equally spaced marks on the cylinder, and", "download": "https://dasl.datadescription.com/download/data/3083", "filename": "Boyle", "name": "Boyle", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "We know from common sense and from Physics that heavier cars need more fuel, but exactly how does a car’s weight affect its fuel efficiency? The data set continues data on 38 cars including their fuel efficiency in miles per gallon measured on a track. ", "download": "https://dasl.datadescription.com/download/data/3228", "filename": "Fuel-efficiency", "name": "Fuel efficiency", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "A student experimenting with a pendulum counted the number of full swings the pendulum made in 20 seconds for various lengths of string. Her data are given. ", "download": "https://dasl.datadescription.com/download/data/3390", "filename": "Pendulum", "name": "Pendulum", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true } ], "subcategory_name": "Other" } ] }, { "category_name": "Chemistry", "subcategories": [ { "datasets": [ - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The data give the concentrations at equi-spaced times of an intermediate compound during a chemical experiment involving a catalyst. The experiment was conducted in the Department of Chemistry at the Australian National University. The compound is producing exponentially during the first stage of the experiment and then is consumed exponentially during the second stage. Theoretically the process can be described by a compartment model, and the expected concentration of the compound over time is decribed by a second order differential equation. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nConcentration\n\nConcentration of intermediate compound\n", - "download": "http://www.statsci.org/data/oz/sargeson.txt", - "filename": "sargeson", - "name": "Chemical Experiment with Catalyst", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "This dataset is distributed with S-Plus as the dataframe ethanol. \nThe engine exhaust was analysed in an experiment in which ethanol was burned in a single cylinder automobile test engine. The response variable is NOx, the concentration of nitric oxide (NO) and nitrogen dioxide (NO2) in the engine exhaust, normalized by the work done by the engine. The explanatory variables are the compression ratio of the engine and the equivalence ratio at which the engine was run - a measure of the richness of the air/ethanol mix. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nNOx\n\nConcentration of nitric oxide (NO) and nitrogen dioxide (NO2)\n\nCompression\n\nCompression ratio\n\nEquivalence\n\nEquivalence ratio\n", - "download": "http://www.statsci.org/data/general/ethanol.txt", - "filename": "ethanol", - "name": "Exhaust from Burning Ethanol", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "-", "download": "https://dasl.datadescription.com/download/data/3112", "filename": "Chromatography", "name": "Chromatography", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "A student, preparing for a triathlon, suspected that the 45 minutes each day\nshe spent training in a chlorinated pool was damaging her nail polish. She\nwished to investigate whether the color of the nail polish might make a difference.\nShe mounted acrylic nails on sticks and polished them with two different color nail polishes. She soaked them together in a chlorine solution equivalent to a swimming pool’s chlorination and then tapped them 100 times on a computer keyboard to simulate daily stress. The response is the % of nail chipped off as measured by scanning images of the nails and using an image processing program.", "download": "https://dasl.datadescription.com/download/data/3356", "filename": "Nail-polish", "name": "Nail polish", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true } ], "subcategory_name": "General" } ] } ], - "collection_name": "StatSci_Datasets" + "collection_name": "DASL" } diff --git a/data/datasets/DatasetCollections.json b/data/datasets/DatasetCollections.json index 619b11f91..58400dea2 100644 --- a/data/datasets/DatasetCollections.json +++ b/data/datasets/DatasetCollections.json @@ -1,14 +1,31 @@ [ + { + "name" : "DASL", + "description" : "The Data And Story Library (DASL) is brought to you by Data Description, creators of Data Desk: Data Analysis and Exploration software. DASL provides data from a wide variety of topics so that statistics teachers can find interesting, real-world examples for their students. We know a good example can make a lesson on a particular statistics method vivid and relevant. This library is designed to help teachers locate and identify datafiles for teaching as well as serve as an archive for datasets from statistics literature.", + "url" : "https://dasl.datadescription.com/" + }, + { + "name" : "JSEDataArchive", + "description" : "JSE Data Archive - data archive provided by the Journal of Statistics Education.", + "url" : "http://jse.amstat.org/jse_data_archive.htm" + }, { "name" : "Rdatasets", - "description" : "Rdatasets is a collection of over 1300 datasets that were originally distributed alongside the statistical software environment R and some of its add-on packages. The goal is to make these data more broadly accessible for teaching and statistical software development." + "description" : "Rdatasets is a collection of over 1300 datasets that were originally distributed alongside the statistical software environment R and some of its add-on packages. The goal is to make these data more broadly accessible for teaching and statistical software development.", + "url" : "https://vincentarelbundock.github.io/Rdatasets/" }, { "name" : "OzDASL", - "description" : "OzDASL is a library of data sets and associated stories. It is intended as a resource for teachers of statistics in Australia and New Zealand and elsewhere. Emphasis is given to data sets with an Australasian context, although interesting data sets from anywhere in the world are included." + "description" : "OzDASL is a library of data sets and associated stories. It is intended as a resource for teachers of statistics in Australia and New Zealand and elsewhere. Emphasis is given to data sets with an Australasian context, although interesting data sets from anywhere in the world are included.", + "url" : "http://www.statsci.org/data/" }, { "name" : "Socrata", "description" : "" + }, + { + "name" : "StatLib", + "description" : "StatLib is a datasets archive maintained by Pantelis Vlachos from Carnegie Melon University.", + "url" : "http://lib.stat.cmu.edu/datasets/" } ] diff --git a/data/datasets/JSEDataArchive.json b/data/datasets/JSEDataArchive.json new file mode 100644 index 000000000..f5a7b7fb0 --- /dev/null +++ b/data/datasets/JSEDataArchive.json @@ -0,0 +1,980 @@ +{ + "categories": [ + { + "category_name": "Medicine", + "subcategories": [ + { + "datasets": [ + { + "DateTime_format": "yyyy-MM-dd", + "comment_character": "#", + "create_index_column": false, + "description": "Time of Birth, Sex, and Birth Weight of 44 Babies", + "download": "http://jse.amstat.org/datasets/babyboom.dat.txt", + "filename": "babyboom", + "name": "babyboom", + "number_format": 31, + "remove_quotes": true, + "separator": "TAB", + "simplify_whitespaces": false, + "skip_empty_parts": true, + "use_first_row_for_vectorname": false + }, + { + "DateTime_format": "yyyy-MM-dd", + "comment_character": "#", + "create_index_column": false, + "description": "This dataset contains 21 body dimension measurements as well as age, \nweight, height, and gender on 507 individuals. The 247 men and 260 \nwomen were primarily individuals in their twenties and thirties, with a \nscattering of older men and women, all exercising several hours a week.", + "download": "http://jse.amstat.org/datasets/body.dat.txt", + "filename": "Body", + "name": "Exploring Relationships in Body Dimensions", + "number_format": 31, + "remove_quotes": true, + "separator": "SPACE", + "simplify_whitespaces": false, + "skip_empty_parts": true, + "use_first_row_for_vectorname": false + }, + { + "DateTime_format": "yyyy-MM-dd", + "comment_character": "#", + "create_index_column": false, + "description": "Each record contains the results of a laboratory analysis of calcium, \ninorganic phosphorous, and alkaline phosphatase. The variable cammol \nis measured as millimoles per liter. Phosmol is inorganic phosphorous \nin millimoles per liter. Alkphos is meauring alkaline phosphatase in \ninternational units per liter. The purpose of the study was to \ndetermine if significant gender differences exist in the mean values \nof calcium, inorganic phosphorus, and alkaline phosphatase in \nsubjects over age 65. A second purpose was to determine if analytical \nvariation between laboratoreis would affect the mean values of the study variables. \nCalcium.dat contains incorrect records that have transcription errors. Calciumgood.dat \ncontains the corrected values. ", + "download": "http://jse.amstat.org/datasets/calcium.dat.txt", + "filename": "Calcium", + "name": " Calcium, inorganic phosphorus and alkaline phosphatase levels in elderly patients ", + "number_format": 31, + "remove_quotes": true, + "separator": "SPACE", + "simplify_whitespaces": false, + "skip_empty_parts": true, + "use_first_row_for_vectorname": false + }, + { + "DateTime_format": "yyyy-MM-dd", + "comment_character": "#", + "create_index_column": false, + "description": "Percentage of body fat, age, weight, height, and ten body circumference\nmeasurements (e.g., abdomen) are recorded for 252 men. Body fat, a\nmeasure of health, is estimated through an underwater weighing\ntechnique. Fitting body fat to the other measurements using multiple\nregression provides a convenient way of estimating body fat for men\nusing only a scale and a measuring tape.", + "download": "http://jse.amstat.org/datasets/fat.dat.txt", + "filename": "fat", + "name": "Fitting Percentage of Body Fat to Simple Body Measurements", + "number_format": 31, + "remove_quotes": true, + "separator": "SPACE", + "simplify_whitespaces": false, + "skip_empty_parts": true, + "use_first_row_for_vectorname": false + }, + { + "DateTime_format": "yyyy-MM-dd", + "comment_character": "#", + "create_index_column": false, + "description": "Sample of 654 youths, aged 3 to 19, in the area of East Boston\nduring middle to late 1970's. Interest concerns the relationship\nbetween smoking and FEV. Since the study is necessarily\nobservational, statistical adjustment via regression models\nclarifies the relationship.", + "download": "http://jse.amstat.org/datasets/fev.dat.txt", + "filename": "fev_", + "name": "Forced Expiratory Volume (FEV) Data", + "number_format": 31, + "remove_quotes": true, + "separator": "SPACE", + "simplify_whitespaces": false, + "skip_empty_parts": true, + "use_first_row_for_vectorname": false + }, + { + "DateTime_format": "yyyy-MM-dd", + "comment_character": "#", + "create_index_column": false, + "description": "The tab-delimited data set gives characteristics of young female patients between\nthe ages of 11 to 26 who came to clinics of Johns Hopkins Medical Institutions between\n2006 and 2008 to begin the three-shot regimen of vaccinations with the anti-human\npapillomavirus (HPV) medication Gardasil. ", + "download": "http://jse.amstat.org/v19n1/gardasil.dat.txt", + "filename": "gardasil", + "name": "Retrospective Study (Potential Predictors for Completion or Non-Completion of ", + "number_format": 31, + "remove_quotes": true, + "separator": "TAB", + "simplify_whitespaces": false, + "skip_empty_parts": true, + "use_first_row_for_vectorname": true + }, + { + "DateTime_format": "yyyy-MM-dd", + "comment_character": "#", + "create_index_column": false, + "description": "From a very young age, shoes for boys tend to be wider than shoes for \ngirls. Is this because boys have wider feet, or because it is assumed that \ngirls, even in elementary school, are willing to sacrifice comfort for fashion? \nTo assess the former, a statistician measures kids' feet. Methods for analysis include \nt-tests, ANCOVA, and least-squares model building. This data set is useful for \ndiscussion of covariates, confounding, and conclusions in the context of the problem.", + "download": "http://jse.amstat.org/datasets/kidsfeet.dat.txt", + "filename": "kidsfeet", + "name": "Foot measurements for fourth grade children", + "number_format": 31, + "remove_quotes": true, + "separator": "SPACE", + "simplify_whitespaces": false, + "skip_empty_parts": true, + "use_first_row_for_vectorname": false + }, + { + "DateTime_format": "yyyy-MM-dd", + "comment_character": "#", + "create_index_column": false, + "description": "This article takes data from a paper in the _Journal of the American\nMedical Association_ that examined whether the true mean body\ntemperature is 98.6 degrees Fahrenheit. Because the dataset suggests\nthat the true mean is approximately 98.2, it helps students to grasp\nconcepts about true means, confidence intervals, and t-statistics.\nStudents can use a t-test to test for sex differences in body\ntemperature and regression to investigate the relationship between\ntemperature and heart rate.", + "download": "http://jse.amstat.org/datasets/normtemp.dat.txt", + "filename": "normtemp", + "name": "Normal Body Temperature, Gender, and Heart Rate ", + "number_format": 31, + "remove_quotes": true, + "separator": "SPACE", + "simplify_whitespaces": false, + "skip_empty_parts": true, + "use_first_row_for_vectorname": false + } + ], + "subcategory_name": "Common" + }, + { + "datasets": [ + { + "DateTime_format": "yyyy-MM-dd", + "comment_character": "#", + "create_index_column": false, + "description": "Drug interaction study of a new and a standard oral contraceptive \ntherapy. See the \"STORY BEHIND THE DATA\" and \"PEDAGOGICAL NOTES\" \nsections below for details.", + "download": "http://jse.amstat.org/datasets/ocdrug.dat.txt", + "filename": "ocdrug", + "name": "Drug Interaction", + "number_format": 31, + "remove_quotes": true, + "separator": "SPACE", + "simplify_whitespaces": false, + "skip_empty_parts": true, + "use_first_row_for_vectorname": false + }, + { + "DateTime_format": "yyyy-MM-dd", + "comment_character": "#", + "create_index_column": false, + "description": "Bacteria are cultured in medical laboratories to identify them so patients can be treated \ncorrectly. The tryptone dataset contains measurements of bacteria counts following the \nculturing of five strains of Staphylococcus aureus. There are many strains of \nStaphylococcus aureus; five were used by the experimenter. They are identified by numbers \nin the data because their names are too complicated to be useful as identifiers. The \ndataset also contains the time of incubation, temperature of incubation and concentration \nof tryptone, a nutrient. The protocols for culturing this bacteria, set the time at 24 \nhours, the temperature at 35 degrees and the tryptone concentration at 1.0%. The question \nis whether the conditions recommended in the protocols for the culturing of these strains \nare optimal. The task is to find the incubation time, temperature and tryptone concentration \nthat optimises the growth of this Bacterium.", + "download": "http://jse.amstat.org/datasets/Tryptone.dat.txt", + "filename": "Tryptone", + "name": "The Tryptone Task ", + "number_format": 31, + "remove_quotes": true, + "separator": "SPACE", + "simplify_whitespaces": false, + "skip_empty_parts": true, + "use_first_row_for_vectorname": true + } + ], + "subcategory_name": "Other" + }, + { + "datasets": [ + { + "DateTime_format": "yyyy-MM-dd", + "comment_character": "#", + "create_index_column": false, + "description": "Measurements of weight and tar, nicotine, and carbon monoxide content\nare given for 25 brands of domestic cigarettes.", + "download": "http://jse.amstat.org/datasets/cigarettes.dat.txt", + "filename": "Cigarette_", + "name": "Cigarette data for an introduction to multiple regression", + "number_format": 31, + "remove_quotes": true, + "separator": "SPACE", + "simplify_whitespaces": false, + "skip_empty_parts": true, + "use_first_row_for_vectorname": false + } + ], + "subcategory_name": "Smoking" + } + ] + }, + { + "category_name": "Nature", + "subcategories": [ + { + "datasets": [ + { + "DateTime_format": "yyyy-MM-dd", + "comment_character": "#", + "create_index_column": false, + "description": "The dataset consists of a few variables that may influence the demand for Beef in the United States. It provides an example of the influence of inflation in monetary time series data as well as providing some interesting statistical features in building demand models in regression.", + "download": "http://jse.amstat.org/v22n1/kopcso/BeefDemand.txt", + "filename": "BeefDemand", + "name": "Beef Demand", + "number_format": 31, + "remove_quotes": true, + "separator": "TAB", + "simplify_whitespaces": false, + "skip_empty_parts": true, + "use_first_row_for_vectorname": true + }, + { + "DateTime_format": "yyyy-MM-dd", + "comment_character": "#", + "create_index_column": false, + "description": "159 fishes of 7 species are caught and measured. Altogether there are\n8 variables. All the fishes are caught from the same lake\n(Laengelmavesi) near Tampere in Finland.", + "download": "http://jse.amstat.org/datasets/fishcatch.dat.txt", + "filename": "fishcatch", + "name": "fishcatch", + "number_format": 31, + "remove_quotes": true, + "separator": "SPACE", + "simplify_whitespaces": false, + "skip_empty_parts": true, + "use_first_row_for_vectorname": false + }, + { + "DateTime_format": "yyyy-MM-dd", + "comment_character": "#", + "create_index_column": false, + "description": "A cost of increased reproduction in terms of reduced longevity has been\nshown for female fruitflies, but not for males. The flies used were an\noutbred stock. Sexual activity was manipulated by supplying individual\nmales with one or eight receptive virgin females per day. The\nlongevity of these males was compared with that of two control types.\nThe first control consisted of two sets of individual males kept with\none or eight newly inseminated females. Newly inseminated females will\nnot usually remate for at least two days, and thus served as a control\nfor any effect of competition with the male for food or space. The\nsecond control was a set of individual males kept with no females.\nThere were 25 males in each of the five groups, which were treated\nidentically in number of anaesthetizations (using CO2) and provision of\nfresh food medium.", + "download": "http://jse.amstat.org/datasets/fruitfly.dat.txt", + "filename": "fruitfly", + "name": "Sexual activity and the lifespan of male fruitflies", + "number_format": 31, + "remove_quotes": true, + "separator": "TAB", + "simplify_whitespaces": false, + "skip_empty_parts": true, + "use_first_row_for_vectorname": true + } + ], + "subcategory_name": "Animals" + } + ] + }, + { + "category_name": "Statistics", + "subcategories": [ + { + "datasets": [ + { + "DateTime_format": "yyyy-MM-dd", + "comment_character": "#", + "create_index_column": false, + "description": "http://jse.amstat.org/datasets/aaup.txt", + "download": "http://jse.amstat.org/datasets/aaup.dat.txt", + "filename": "AAUP", + "name": "AAUP Faculty Salary data", + "number_format": 31, + "remove_quotes": true, + "separator": ",", + "simplify_whitespaces": false, + "skip_empty_parts": true, + "use_first_row_for_vectorname": false + }, + { + "DateTime_format": "yyyy-MM-dd", + "comment_character": "#", + "create_index_column": false, + "description": "The dataset bestbuy.day contains monthly data on computer usage \n(MIPS) and total number of stores from August 1996 to July 2000. \nAdditionally, information on the planned number of stores through \nDecember 2001 is available. These data can be used to compare \ntime-series forecasting with trend and seasonality components and \ncausal forecasting based on simple linear regression. The simple \nlinear regression model exhibits unequal error variances, suggesting \na transformation of Y.", + "download": "http://jse.amstat.org/datasets/bestbuy.dat.txt", + "filename": "Best_Buy", + "name": " BestBuy", + "number_format": 31, + "remove_quotes": true, + "separator": "SPACE", + "simplify_whitespaces": false, + "skip_empty_parts": true, + "use_first_row_for_vectorname": false + }, + { + "DateTime_format": "yyyy-MM-dd", + "comment_character": "#", + "create_index_column": false, + "description": "The dollar amount for a monthly (January 1991 through December 2000) \nhousehold electric bill is presented as a time series. In addition, \npotential explanatory variables are included. Twelve representative \nmonthly values are provided for the average temperature, for \nheating degree days, and for cooling degree days (not for each \nmonth for each year). Additional variables give the family size \neach month and indicate when a new electric meter and new heating \nand cooling equipment was installed. To convert the billing amount \nto estimated power consumption, a tiered rate function (supplied \nin the accompanying Instructor's Manual) and the costs of \nassociated riders (provided here) must be used. Consumption \nestimates resulting from this information are supplied.\t", + "download": "http://jse.amstat.org/datasets/electricbill.dat.txt", + "filename": "electricbill", + "name": " Electric Bill Data", + "number_format": 31, + "remove_quotes": true, + "separator": "SPACE", + "simplify_whitespaces": false, + "skip_empty_parts": true, + "use_first_row_for_vectorname": false + }, + { + "DateTime_format": "yyyy-MM-dd", + "comment_character": "#", + "create_index_column": false, + "description": "The data file contains information on 76 single-family homes inEugene, Oregon during 2005. This dataset is suitable for a completemultiple linear regression analysis of home price data that coversmany of the usual regression topics, including interaction andpredictor transformations. Whereas realtors use experience and localknowledge to subjectively value a house based on its characteristics(size, amenities, location, etc.) and the prices of similar housesnearby, regression analysis can provide an alternative that moreobjectively models local house prices using these same data.SOURCES:The data were provided by Victoria Whitman, a realtor in Eugene, in2005. The data were used in a case study in Pardoe (2006).", + "download": "http://jse.amstat.org/datasets/homes76.dat.txt", + "filename": "homes76", + "name": " Modeling home prices using realtor data", + "number_format": 31, + "remove_quotes": true, + "separator": "SPACE", + "simplify_whitespaces": false, + "skip_empty_parts": true, + "use_first_row_for_vectorname": false + }, + { + "DateTime_format": "yyyy-MM-dd", + "comment_character": "#", + "create_index_column": false, + "description": " This file contains total US gross box office receipts for 49 movies. This data is to accompany the article entitled Movie Data.", + "download": "http://jse.amstat.org/datasets/movietotal.dat.txt", + "filename": "movietotal", + "name": "movietotal", + "number_format": 31, + "remove_quotes": true, + "separator": "TAB", + "simplify_whitespaces": false, + "skip_empty_parts": true, + "use_first_row_for_vectorname": true + }, + { + "DateTime_format": "yyyy-MM-dd", + "comment_character": "#", + "create_index_column": false, + "description": "For 97 countries in the world, data are given for birth rates, death\nrates, infant death rates, life expectancies for males and females, and\nGross National Product.\n\nVARIABLE DESCRIPTIONS:\nColumns\n 1 - 6 Live birth rate per 1,000 of population\n 7 - 14 Death rate per 1,000 of population\n15 - 22 Infant deaths per 1,000 of population under 1 year old\n23 - 30 Life expectancy at birth for males\n31 - 38 Life expectancy at birth for females\n39 - 46 Gross National Product per capita in U.S. dollars \n47 - 52 Country Group\n 1 = Eastern Europe\n 2 = South America and Mexico\n 3 = Western Europe, North America, Japan, Australia, New Zealand\n 4 = Middle East\n 5 = Asia\n 6 = Africa\n53 - 74 Country", + "download": "http://jse.amstat.org/datasets/poverty.dat.txt", + "filename": "poverty", + "name": "The Statistics of Poverty and Inequality ", + "number_format": 31, + "remove_quotes": true, + "separator": "SPACE", + "simplify_whitespaces": false, + "skip_empty_parts": true, + "use_first_row_for_vectorname": false + } + ], + "subcategory_name": "Economics" + }, + { + "datasets": [ + { + "DateTime_format": "yyyy-MM-dd", + "comment_character": "#", + "create_index_column": false, + "description": "http://jse.amstat.org/datasets/ballbearings.txt", + "download": "http://jse.amstat.org/datasets/ballbearings.dat.txt", + "filename": "ballbearings", + "name": "Ball Bearing Reliability Data", + "number_format": 31, + "remove_quotes": true, + "separator": "SPACE", + "simplify_whitespaces": false, + "skip_empty_parts": true, + "use_first_row_for_vectorname": false + }, + { + "DateTime_format": "yyyy-MM-dd", + "comment_character": "#", + "create_index_column": false, + "description": "http://jse.amstat.org/datasets/baseball.txt", + "download": "http://jse.amstat.org/datasets/baseball.dat.txt", + "filename": "baseball_", + "name": "baseball", + "number_format": 31, + "remove_quotes": true, + "separator": "SPACE", + "simplify_whitespaces": false, + "skip_empty_parts": true, + "use_first_row_for_vectorname": false + }, + { + "DateTime_format": "yyyy-MM-dd", + "comment_character": "#", + "create_index_column": false, + "description": "This data set contains every NCAA Basketball Tournament game \never played. The tournament has been held every year since 1939.", + "download": "http://jse.amstat.org/datasets/basketball.dat.txt", + "filename": "Basketball", + "name": "NCAA Basketball Tournament Data", + "number_format": 31, + "remove_quotes": true, + "separator": "SPACE", + "simplify_whitespaces": false, + "skip_empty_parts": true, + "use_first_row_for_vectorname": false + }, + { + "DateTime_format": "yyyy-MM-dd", + "comment_character": "#", + "create_index_column": false, + "description": "Data are provided for Barry Bonds' plate appearances in the 2001\nbaseball season. Variables include characteristics of the innings\nbefore the first pitch to Bonds (e.g., the number of outs, the number\nof runners on each base, the score, the opposing pitcher's earned run\naverage) and after the first pitch to Bonds (e.g., the outcome of the\nappearance, how many runs scored in the inning after Bonds hits).", + "download": "http://jse.amstat.org/datasets/bonds2001.dat.txt", + "filename": "Bonds", + "name": "Barry Bonds' 2001 Plate Appearances", + "number_format": 31, + "remove_quotes": true, + "separator": "SPACE", + "simplify_whitespaces": false, + "skip_empty_parts": true, + "use_first_row_for_vectorname": false + }, + { + "DateTime_format": "yyyy-MM-dd", + "comment_character": "#", + "create_index_column": false, + "description": "The dataset contains the scores, opponents, and sites of the 18 Big Ten\nmen's basketball games that involved the University of Iowa in 1997.", + "download": "http://jse.amstat.org/datasets/hawks.dat.txt", + "filename": "hawks", + "name": " 1997 University of Iowa Big Ten Basketball Data", + "number_format": 31, + "remove_quotes": true, + "separator": "SPACE", + "simplify_whitespaces": false, + "skip_empty_parts": true, + "use_first_row_for_vectorname": false + }, + { + "DateTime_format": "yyyy-MM-dd", + "comment_character": "#", + "create_index_column": false, + "description": "The dataset consists of game-by-game information for the 1998 season\nfor Mark McGwire and the St. Louis Cardinals, and Sammy Sosa and the\nChicago Cubs. The dataset includes information on the home run hitting\nof these two players, as well as game results for the teams.", + "download": "http://jse.amstat.org/datasets/homerun.dat.txt", + "filename": "homerun", + "name": "The 1998 Home Run Race Between Mark McGwire and Sammy Sosa", + "number_format": 31, + "remove_quotes": true, + "separator": "SPACE", + "simplify_whitespaces": false, + "skip_empty_parts": true, + "use_first_row_for_vectorname": false + }, + { + "DateTime_format": "yyyy-MM-dd", + "comment_character": "#", + "create_index_column": false, + "description": "Data are from The Baseball Encyclopedia (1993) and Total Baseball (2001). \nThey include the location, league affiliation (National or American), \ndivision affiliation (East, Central, or West), season of play, home game \nattendance, runs scored, runs allowed, wins, losses, and number of games \nbehind the division leader for each major league franchise for the 1969 \nthrough 2000 seasons. Other data (including opening dates for new stadia, \nand dates of work stoppages) were collected from Ballparks by Munsey and \nSuppes (2001) and InfoPlease (2001).", + "download": "http://jse.amstat.org/datasets/MLBattend.dat.txt", + "filename": "MLBattend", + "name": "1969-2000 Major League Baseball Attendance data", + "number_format": 31, + "remove_quotes": true, + "separator": "SPACE", + "simplify_whitespaces": false, + "skip_empty_parts": true, + "use_first_row_for_vectorname": false + }, + { + "DateTime_format": "yyyy-MM-dd", + "comment_character": "#", + "create_index_column": false, + "description": "Driver results for all NASCAR races between 1975 1nd 2003, inclusive. The dataset constitutes all \nparticipants in each of 898 races, and includes their start/finish postions, prize winnings, car \nmake and laps completed.", + "download": "http://jse.amstat.org/datasets/nascard.dat.txt", + "filename": "nascard", + "name": "NASCAR Driver Results", + "number_format": 31, + "remove_quotes": true, + "separator": "SPACE", + "simplify_whitespaces": false, + "skip_empty_parts": true, + "use_first_row_for_vectorname": false + }, + { + "DateTime_format": "yyyy-MM-dd", + "comment_character": "#", + "create_index_column": false, + "description": "Race results for all NASCAR Winston Cup races between 1975 and 2003, inclusive. The dataset \nincludes the numbers of cars, total prize winnings, monthly consumer price index for the month \nof the race, track length, laps completed by the winner, spatial co-ordinates and name of track.", + "download": "http://jse.amstat.org/datasets/nascarr.dat.txt", + "filename": "nascarr", + "name": "NASCAR Race Results", + "number_format": 31, + "remove_quotes": true, + "separator": "SPACE", + "simplify_whitespaces": false, + "skip_empty_parts": true, + "use_first_row_for_vectorname": false + }, + { + "DateTime_format": "yyyy-MM-dd", + "comment_character": "#", + "create_index_column": false, + "description": "This data set contains performance statistics for National \nFootball League (NFL) teams for their 2000 regular season.\n Columns Variable Description\n 1 - 3 initials team initials\n 5 - 26 team name and location of the team\n 28 - 29 wins wins\n 31 - 32 losses losses\n 34 - 35 homedrives50 drives begun in opponents' territory\n 37 - 38 homedrives20 drives begun within 20 yards of the goal\n 40 - 41 oppdrives50 opponents drives begun in team's territory\n 43 oppdrives20 opponents drives begun within 20 yards of goal\n 45 homepuntblock punts blocked by team\n 47 opppuntblock punts team had blocked\n 49 - 50 hometouch touchdowns scored by team\n 52 - 53 opptouch touchdowns scored against team\n 55 - 58 homeyards total yardage gained by offense\n 60 - 63 oppyards total yardage allowed by defense\n 65 - 68 hometop time of possession by offense (in minutes)\n 70 - 73 opptop time of possession by opponents' offense\n 75 - 76 homefgm field goals made\n 78 - 79 oppfgm field goals allowed to opponents\n 81 - 82 homefga field goals attempted\n 84 - 85 oppfga field goals attempted by opponents\n 87 - 89 opppuntno punts made by opponents\n 91 - 94 opppuntave average length of punts made by opponents\n 96 - 99 opppuntnet average change in field position \n during opponents' punts\n101 - 102 opppunttb opponents' punts taken for touchbacks\n104 - 105 opppunt20 opponents' punts that resulted in the team's\n offense beginning within 20 yards of their \n own (defensive) goal line\n107 - 108 opppuntlong longest opponents' punt\n110 - 112 homepuntno punts made by team\n114 - 117 homepuntave average length of punts made by team\n119 - 122 homepuntnet average change in field position \n during team's punts\n124 - 125 homepunttb team's punts taken for touchbacks\n127 - 128 homepunt20 team's punts that resulted in the opponents'\n offense beginning within 20 yards of their \n own (defensive) goal line\n130 - 131 homepuntlong longest team punt\n133 - 135 home1sts first downs obtained by offense\n137 - 139 opp1sts first downs allowed by defense\n141 - 142 homesacks sacks achieved by team's defense\n144 - 145 oppsacks sacks allowed by team's offense\n147 - 148 homekos kickoffs made by team\n150 - 151 oppkos kickoffs received by team\n153 - 156 homekoyds yards gained during kickoff returns\n158 - 161 oppkoyds yards allowed to opposition during kickoff returns\n163 - 166 homekoave average yards gained during kickoff returns\n168 - 171 oppkoave average yards allowed during kickoff returns\n173 - 175 homekolong longest kickoff return made by team\n177 - 179 oppkolong longest kickoff return allowed by team\n181 homekotds kickoffs returned for a touchdown by team\n183 oppkotds kickoffs returned for touchdown by opposition\n185 - 186 homerets punts returned by team\n188 - 189 opprets punts returned by opposition\n191 - 192 homefc punts \"fair caught\" by team\n194 - 195 oppfc punts \"fair caught\" by opposition\n197 - 199 homeretyds return yardage on punts by team\n201 - 203 oppretyds return yardage on punts by opposition\n205 - 208 homeretave average length of punt returns by team\n210 - 213 oppretave average length of punt returns by opposition\n215 homerettds punts returned by team for a touchdown\n217 opprettds punts returned by opponents for a touchdown\n219 - 220 homeint interceptions made by team's defense\n222 - 223 oppint interceptions made against team's offense\n225 - 226 homerecover fumbles recovered by team's defense\n228 - 229 opprecover fumbles recovered by opposing defenses\n231 - 232 numgames games played by team\n234 - 237 opprateyds average number of yards gained \n per minute of possession by opponents\n239 - 242 homerateyds average number of yards gained \n per minute of possession by team\n244 - 247 opppuntrate average number of punts \n per minute of possession by opponents\n249 - 252 homepuntrate average number of punts \n per minute of possession by team\n254 - 258 oppratetd average number of touchdowns \n per minute of possession by opponents\n260 - 264 homeratetd average number of touchdowns \n per minute of possession by team\n266 - 269 winpercent winning percentage\n271 - 275 hometorate turnovers obtained by team,\n per minute of possession by opponents\n277 - 281 opptorate turnovers allowed by team, \n per minute of possession\n283 - 286 home1rate first downs obtained by team, \n per minute of possession\n288 - 291 opp1rate first downs allowed by team's defense, \n per minute of possession by opposition\n293 - 295 homepoints points scored by team\n297 - 299 opppoints points scored against team\n301 - 303 conference conference to which the team belongs (AFC or NFC)", + "download": "http://jse.amstat.org/datasets/nfl2000.dat.txt", + "filename": "nfl2000", + "name": "NFL Y2K PCA", + "number_format": 31, + "remove_quotes": true, + "separator": "SPACE", + "simplify_whitespaces": false, + "skip_empty_parts": true, + "use_first_row_for_vectorname": false + }, + { + "DateTime_format": "yyyy-MM-dd", + "comment_character": "#", + "create_index_column": false, + "description": "Four datasets (nfl93.dat, nfl94.dat, nfl95.dat, nfl96.dat) contain\nNational Football League game results for recent seasons. In addition\nto game scores, the datasets give oddsmakers' pointspreads and\nover/under values for each game.", + "download": "http://jse.amstat.org/datasets/nfl93.dat.txt", + "filename": "nfl93", + "name": "NFL Scores and Pointspreads", + "number_format": 31, + "remove_quotes": true, + "separator": "SPACE", + "simplify_whitespaces": false, + "skip_empty_parts": true, + "use_first_row_for_vectorname": false + }, + { + "DateTime_format": "yyyy-MM-dd", + "comment_character": "#", + "create_index_column": false, + "description": "Four datasets (nfl93.dat, nfl94.dat, nfl95.dat, nfl96.dat) contain\nNational Football League game results for recent seasons. In addition\nto game scores, the datasets give oddsmakers' pointspreads and\nover/under values for each game.", + "download": "http://jse.amstat.org/datasets/nfl94.dat.txt", + "filename": "nfl94", + "name": "NFL Scores and Pointspreads", + "number_format": 31, + "remove_quotes": true, + "separator": "SPACE", + "simplify_whitespaces": false, + "skip_empty_parts": true, + "use_first_row_for_vectorname": false + }, + { + "DateTime_format": "yyyy-MM-dd", + "comment_character": "#", + "create_index_column": false, + "description": "Four datasets (nfl93.dat, nfl94.dat, nfl95.dat, nfl96.dat) contain\nNational Football League game results for recent seasons. In addition\nto game scores, the datasets give oddsmakers' pointspreads and\nover/under values for each game.", + "download": "http://jse.amstat.org/datasets/nfl95.dat.txt", + "filename": "nfl95", + "name": "NFL Scores and Pointspreads", + "number_format": 31, + "remove_quotes": true, + "separator": "SPACE", + "simplify_whitespaces": false, + "skip_empty_parts": true, + "use_first_row_for_vectorname": false + }, + { + "DateTime_format": "yyyy-MM-dd", + "comment_character": "#", + "create_index_column": false, + "description": "Four datasets (nfl93.dat, nfl94.dat, nfl95.dat, nfl96.dat) contain\nNational Football League game results for recent seasons. In addition\nto game scores, the datasets give oddsmakers' pointspreads and\nover/under values for each game.", + "download": "http://jse.amstat.org/datasets/nfl96.dat.txt", + "filename": "nfl96", + "name": " NFL Scores and Pointspreads", + "number_format": 31, + "remove_quotes": true, + "separator": "SPACE", + "simplify_whitespaces": false, + "skip_empty_parts": true, + "use_first_row_for_vectorname": false + }, + { + "DateTime_format": "yyyy-MM-dd", + "comment_character": "#", + "create_index_column": false, + "description": "The dataset contains scores for all regular season National Football\nLeague games from the 1998, 1999 and 2000 seasons. In addition to \nthe points scored by the home and visiting teams in each game, the\ndataset contains a pointspread that handicaps each game.\n\nColumns \n 1 - 4 Year (1998, 1999, or 2000)\n 6 - 7 Week of the season (1 to 17)\n10 - 27 Home team name\n29 - 30 Home team score\n33 - 50 Visiting team name\n52 - 53 Visiting team score\n56 - 60 Pointspread ", + "download": "http://jse.amstat.org/datasets/nfl98-00.dat.txt", + "filename": "nfl98-00", + "name": " NFL Scores for 1998-2000", + "number_format": 31, + "remove_quotes": true, + "separator": "SPACE", + "simplify_whitespaces": false, + "skip_empty_parts": true, + "use_first_row_for_vectorname": false + }, + { + "DateTime_format": "yyyy-MM-dd", + "comment_character": "#", + "create_index_column": false, + "description": " The data set provides the weights (in lbs)\nof the 26 men on the 1996 US Olympic Rowing Team in Atlanta. The\ndata includes the names of the participants and which event they\nrowed in. The US team participated in 7 of the 8 possible events.\nThis data set is useful for discussing outliers,\nexplanations for outliers, and comparing the robustness of the\nmean and the median.\n\n", + "download": "http://jse.amstat.org/datasets/rowing.dat.txt", + "filename": "rowing", + "name": " Weights of 1996 US Olympic Rowing Team", + "number_format": 31, + "remove_quotes": true, + "separator": "SPACE", + "simplify_whitespaces": false, + "skip_empty_parts": true, + "use_first_row_for_vectorname": false + }, + { + "DateTime_format": "yyyy-MM-dd", + "comment_character": "#", + "create_index_column": false, + "description": "Data consist of 500-yard freestyle swim times for male and female swimmers age 50-94 in a biennial national competition. Variables include year, gender, age, age group, swim time, seed time (qualifying time from state competition), and split times (in each 50-yard segment).", + "download": "http://jse.amstat.org/v22n1/doane/SeniorSwimTimes-DataSet.txt", + "filename": "SeniorSwimTimes", + "name": "SeniorSwimTimes", + "number_format": 31, + "remove_quotes": true, + "separator": "TAB", + "simplify_whitespaces": false, + "skip_empty_parts": true, + "use_first_row_for_vectorname": true + }, + { + "DateTime_format": "yyyy-MM-dd", + "comment_character": "#", + "create_index_column": false, + "description": "Data are provided for the 56 Tour De France bicycle races since World \nWar II. The year and dates of the event, the total number of stages, \nthe total distance, the winning total time and average speed, the name \nand country of the winner, the birth date of the winner, and the \nwinner's age at the time of victory are the variables in the dataset.", + "download": "http://jse.amstat.org/datasets/tdf.dat.txt", + "filename": "tdf", + "name": "Tour De France Winners (Can Lance Win Six?)", + "number_format": 31, + "remove_quotes": true, + "separator": "SPACE", + "simplify_whitespaces": false, + "skip_empty_parts": true, + "use_first_row_for_vectorname": true + } + ], + "subcategory_name": "Sport" + }, + { + "datasets": [ + { + "DateTime_format": "yyyy-MM-dd", + "comment_character": "#", + "create_index_column": false, + "description": "http://jse.amstat.org/datasets/aptness.txt", + "download": "http://jse.amstat.org/datasets/aptness.dat.txt", + "filename": "Aptness", + "name": "Evaluating Aptness of a Regression Model", + "number_format": 31, + "remove_quotes": true, + "separator": "TAB", + "simplify_whitespaces": false, + "skip_empty_parts": true, + "use_first_row_for_vectorname": false + }, + { + "DateTime_format": "yyyy-MM-dd", + "comment_character": "#", + "create_index_column": false, + "description": "In many statistical models the normal distribution of the response is an essential assumption.\nThis paper uses a dataset of 2000 euro coins with information (up to the milligram) about\nthe weight of each coin. As the physical coin production process is subject to a multitude\nof (very small) variability sources, it seems reasonable to expect that the empirical\ndistribution of the weight of euro coins does agree with the normal distribution. Goodness\nof fit tests however show that this is not the case. Moreover, some outliers complicate\nthe analysis. Mixtures of normal distributions and skew normal distributions are fitted\nto the data, revealing that the normality assumption might not hold for those weights.", + "download": "http://jse.amstat.org/datasets/euroweight.dat.txt", + "filename": "euroweight", + "name": "The Weight of Euro Coins ", + "number_format": 31, + "remove_quotes": true, + "separator": "SPACE", + "simplify_whitespaces": false, + "skip_empty_parts": true, + "use_first_row_for_vectorname": false + }, + { + "DateTime_format": "yyyy-MM-dd", + "comment_character": "#", + "create_index_column": false, + "description": "Title, year of release, length in minutes, number of cast members listed, rating, and number of lines \nof description are recorded for a simple random sample of 100 movies. One can use the sample to obtain base-line information \non the movie guide from which the data were collected. The dataset also illustrates two paradoxes for associations between \nthree variables: non-transitivity of positive correlation and Simpson's paradox. SOURCE: The data were taken as a simple \nrandom sample of the approximately 19,000 movies (not including made-for-TV movies) in Leonard Maltin's Movie and Video \nGuide, 1996. ", + "download": "http://jse.amstat.org/datasets/films.dat.txt", + "filename": "films", + "name": "films dataset", + "number_format": 31, + "remove_quotes": true, + "separator": "SPACE", + "simplify_whitespaces": false, + "skip_empty_parts": true, + "use_first_row_for_vectorname": false + }, + { + "DateTime_format": "yyyy-MM-dd", + "comment_character": "#", + "create_index_column": false, + "description": "This dataset contains descriptive data of contestants on the game shoe \"Friend or Foe?\". Information on the contestant's \nrace, sex, age, prize money, and playing strategy are included. ", + "download": "http://jse.amstat.org/datasets/friend_or_foe.dat.txt", + "filename": "friend_or_foe", + "name": "Data from the Television Game Show \"Friend or Foe?\"", + "number_format": 31, + "remove_quotes": true, + "separator": "TAB", + "simplify_whitespaces": false, + "skip_empty_parts": true, + "use_first_row_for_vectorname": true + }, + { + "DateTime_format": "yyyy-MM-dd", + "comment_character": "#", + "create_index_column": false, + "description": "The dataset contains hat size as well as circumference, length of major axis \nand length of minor axis of the inner hat band for 26 hats. The manufacturer \nand the country of manufacture are also included.", + "download": "http://jse.amstat.org/datasets/hats.dat.txt", + "filename": "hats", + "name": " Hat measurements, including hat size", + "number_format": 31, + "remove_quotes": true, + "separator": "SPACE", + "simplify_whitespaces": false, + "skip_empty_parts": true, + "use_first_row_for_vectorname": false + }, + { + "DateTime_format": "yyyy-MM-dd", + "comment_character": "#", + "create_index_column": false, + "description": "The dataset consists of samples of size six taken without replacement\nfrom the integers {1, 2, 3, ..., 42}. There are actually three\ndatasets from three different sources, and in each case the six-tuples\nare (in theory) random selections or samples. The observations in each\nsample are given in the order in which they were obtained or selected.", + "download": "http://jse.amstat.org/datasets/lotto.dat.txt", + "filename": "lotto", + "name": "Lotto 6/42 Selections from Individuals, Irish National Lottery, and S-Plus Simulation", + "number_format": 31, + "remove_quotes": true, + "separator": "TAB", + "simplify_whitespaces": false, + "skip_empty_parts": true, + "use_first_row_for_vectorname": false + }, + { + "DateTime_format": "yyyy-MM-dd", + "comment_character": "#", + "create_index_column": false, + "description": "This file contains daily per theater box office receipts for 49 \nmovies. This data is to accompany the article entitled Movie Data.", + "download": "http://jse.amstat.org/datasets/moviedaily.dat.txt", + "filename": "moviedaily", + "name": "moviedaily", + "number_format": 31, + "remove_quotes": true, + "separator": "TAB", + "simplify_whitespaces": false, + "skip_empty_parts": true, + "use_first_row_for_vectorname": true + }, + { + "DateTime_format": "yyyy-MM-dd", + "comment_character": "#", + "create_index_column": false, + "description": "Every year actors and actresses are chosen to receive the Oscars awards for best actor and for best actress. This dataset \ncontains information about each of the winners for each of the 77 annual Oscar awards.\n\nAlthough there have been only 77 Oscars, there are 78 male winners and 78 female winners because ties happened on two \noccasions (1933 for the best actor and 1969 for the best actress).\n\nVARIABLE DESCRIPTIONS:\n\nColumns Variables\n 1 Gender (m=male f=female)\n 3-4 Oscar Year Number (1-77)\n 6-9 Year the Oscar Took Place\n 11-29 Winner’s first and last name\n 31-60 Name of the Movie in which the winner acted\n 62-63 Age of winner (at the beginning of the winning year)\n 65-77 Birth place (State if born in USA, else Country)\n 79-80 Month in which the winner was born\n 82-83 Day of month on which winner was born\n 85-88 Year the winner was born", + "download": "http://jse.amstat.org/datasets/oscars.dat.txt", + "filename": "oscars", + "name": "Oscars: Best Actors and Actresses", + "number_format": 31, + "remove_quotes": true, + "separator": "SPACE", + "simplify_whitespaces": false, + "skip_empty_parts": true, + "use_first_row_for_vectorname": false + }, + { + "DateTime_format": "yyyy-MM-dd", + "comment_character": "#", + "create_index_column": false, + "description": "This dataset contains information collected from rolling the pair of\npigs (found in the game \"Pass the Pigs\") 6000 times. A description of\nthe rules, scoring configurations, and data collection method are\nincluded in the accompanying paper.", + "download": "http://jse.amstat.org/datasets/pig.dat.txt", + "filename": "pig", + "name": "Data from the game \"Pass the Pigs\"", + "number_format": 31, + "remove_quotes": true, + "separator": "SPACE", + "simplify_whitespaces": false, + "skip_empty_parts": true, + "use_first_row_for_vectorname": false + }, + { + "DateTime_format": "yyyy-MM-dd", + "comment_character": "#", + "create_index_column": false, + "description": "In a residential home, energy consumption is closely related to the\noutdoor temperature and size of the house. In a home of a given size,\ntemperature fluctuations and energy consumption vary fairly predictably\nover time. When homeowners add a room, other things being equal,\nutility usage should increase. This dataset permits students to\nestimate the energy demand and make forecasts for future months, as\nwell as explore other relationships.\n\nThe dataset contains natural gas and electricity usage data for a\ngas-heated single-family residence in the Boston area from September\n1990 through May 1997, accompanied by monthly climatological data. \nThe dataset is useful for illustrating the concepts and techniques of\ncentral tendency, dispersion, elementary time series analysis,\ncorrelation, simple and multiple regression, and variable\ntransformations.", + "download": "http://jse.amstat.org/datasets/utility.dat.txt", + "filename": "utility", + "name": "What Does It Take to Heat a New Room? ", + "number_format": 31, + "remove_quotes": true, + "separator": "SPACE", + "simplify_whitespaces": false, + "skip_empty_parts": true, + "use_first_row_for_vectorname": false + } + ], + "subcategory_name": "Other" + }, + { + "datasets": [ + { + "DateTime_format": "yyyy-MM-dd", + "comment_character": "#", + "create_index_column": false, + "description": "http://jse.amstat.org/datasets/airport.txt", + "download": "http://jse.amstat.org/datasets/airport.dat.txt", + "filename": "US-Airport-Statistics", + "name": " US Airport Statistics", + "number_format": 31, + "remove_quotes": true, + "separator": "SPACE", + "simplify_whitespaces": false, + "skip_empty_parts": true, + "use_first_row_for_vectorname": false + }, + { + "DateTime_format": "yyyy-MM-dd", + "comment_character": "#", + "create_index_column": false, + "description": "The data is a set of 50000 (1.3 MB ) observations containing roughly 2 minutes of traffic from the one hour, larger \ndec-pkt-1.tcp file used in the paper. The larger file can be accessed from the author's web page or from its source. With \nonly 50000 observations, the data set ", + "download": "http://jse.amstat.org/datasets/packetdata.dat.txt", + "filename": "packetdata", + "name": "packetdata", + "number_format": 31, + "remove_quotes": true, + "separator": "SPACE", + "simplify_whitespaces": false, + "skip_empty_parts": true, + "use_first_row_for_vectorname": true + }, + { + "DateTime_format": "yyyy-MM-dd", + "comment_character": "#", + "create_index_column": false, + "description": "This dataset consists of a listing of all US interstate\nhighways, treating the highway as the sampling unit.\n\nVARIABLE DESCRIPTIONS:\nInterstate # Columns 1-2\nNumber of states Columns 4-5\nApproximate miles Columns 7-10\nSouthern or Western end Columns 12-34\nNorthern or Eastern end Columns 36-57", + "download": "http://jse.amstat.org/datasets/ushighway1.dat.txt", + "filename": "ushighway1", + "name": " US INTERSTATE SYSTEM I", + "number_format": 31, + "remove_quotes": true, + "separator": "SPACE", + "simplify_whitespaces": false, + "skip_empty_parts": true, + "use_first_row_for_vectorname": false + }, + { + "DateTime_format": "yyyy-MM-dd", + "comment_character": "#", + "create_index_column": false, + "description": "This dataset consists of a listing of all US interstate\nhighways, treating the highway/state combination \nas the sampling unit. Three principal cities through which the\nhighway runs are given for each state. Therefore, a single highway \nmay have several observations, one observation for each state.", + "download": "http://jse.amstat.org/datasets/ushighway2.dat.txt", + "filename": "ushighway2", + "name": "US INTERSTATE SYSTEM II", + "number_format": 31, + "remove_quotes": true, + "separator": "SPACE", + "simplify_whitespaces": false, + "skip_empty_parts": true, + "use_first_row_for_vectorname": false + }, + { + "DateTime_format": "yyyy-MM-dd", + "comment_character": "#", + "create_index_column": false, + "description": "This dataset consists of a listing of all US 3-digit interstate\nhighways (spurs and connecters), treating the highway/state\ncombination as the sampling unit.", + "download": "http://jse.amstat.org/datasets/ushighway3.dat.txt", + "filename": "ushighway3", + "name": "US INTERSTATE SYSTEM III", + "number_format": 31, + "remove_quotes": true, + "separator": "SPACE", + "simplify_whitespaces": false, + "skip_empty_parts": true, + "use_first_row_for_vectorname": false + } + ], + "subcategory_name": "Travel" + }, + { + "datasets": [ + { + "DateTime_format": "yyyy-MM-dd", + "comment_character": "#", + "create_index_column": false, + "description": "For each of the forty largest countries in the world (according to 1990\npopulation figures), data are given for the country's life expectancy\nat birth, number of people per television set, and number of people per\nphysician.", + "download": "http://jse.amstat.org/datasets/televisions.dat.txt", + "filename": "televisions", + "name": "Televisions, Physicians, and Life Expectancy", + "number_format": 31, + "remove_quotes": true, + "separator": "SPACE", + "simplify_whitespaces": false, + "skip_empty_parts": true, + "use_first_row_for_vectorname": true + } + ], + "subcategory_name": "Population" + }, + { + "datasets": [ + { + "DateTime_format": "yyyy-MM-dd", + "comment_character": "#", + "create_index_column": false, + "description": "The data set gives a random sample of the length of visits of users entering the msnbc.com web site during September 28, 1999.\nThe length of the visit is an estimate of the total number of clicks or pages seen by each user and is based on web server \nlogs, thus it counts only pages recorded by the server. Pages cached in the user's browser or in a cache proxy server are \nunknown. The data set used in the paper is much larger than the one made available here but that larger data set is also \navailable in a page cited in the references. ", + "download": "http://jse.amstat.org/datasets/msnbclength.dat.txt", + "filename": "msnbclength", + "name": "Internet Data Analysis for Undergrad Curriculum", + "number_format": 31, + "remove_quotes": true, + "separator": ",", + "simplify_whitespaces": false, + "skip_empty_parts": true, + "use_first_row_for_vectorname": false + }, + { + "DateTime_format": "yyyy-MM-dd", + "comment_character": "#", + "create_index_column": false, + "description": "The video lottery terminal dataset contains observations on the three\nwindows of an electronic slot machine for 345 plays together with the\nprize paid out for each play. The prize payout distribution is so\nbadly skewed that confidence intervals for expected payout based on the\ncentral limit theorem are not accurate. The dataset can be used at the\ngraduate or upper undergraduate level to illustrate parametric\nbootstrapping. The dataset can also be used in a graduate course to\nillustrate tests of independence for two and three-way contingency\ntables involving random zeroes, or these tables may be collapsed and\nused as examples in an introductory course.", + "download": "http://jse.amstat.org/datasets/vlt.dat.txt", + "filename": "vlt_", + "name": "Video Lottery Terminal Data", + "number_format": 31, + "remove_quotes": true, + "separator": "SPACE", + "simplify_whitespaces": false, + "skip_empty_parts": true, + "use_first_row_for_vectorname": false + } + ], + "subcategory_name": "Technology" + }, + { + "datasets": [ + { + "DateTime_format": "yyyy-MM-dd", + "comment_character": "#", + "create_index_column": false, + "description": "For each U.S. Senator, his or her votes on whether to remove President\nClinton on each of the two articles of impeachment (plus a summary\nvariable representing each Senator's number of \"guilty\" votes) are\nprovided, as well as each Senator's values on several variables that\ncould be predictive of vote (e.g., Senator's degree of conservatism,\nhow well Clinton did in the Senator's state in the 1996 Presidential\nelection).", + "download": "http://jse.amstat.org/datasets/impeach.dat.txt", + "filename": "impeach", + "name": " U.S. Senate Votes on Clinton Removal", + "number_format": 31, + "remove_quotes": true, + "separator": "SPACE", + "simplify_whitespaces": false, + "skip_empty_parts": true, + "use_first_row_for_vectorname": false + }, + { + "DateTime_format": "yyyy-MM-dd", + "comment_character": "#", + "create_index_column": false, + "description": "The data consist of the numbers of days served in office for the 43 \nPresidents of the United States as of 4 February 2004. ", + "download": "http://jse.amstat.org/datasets/outlier.dat.txt", + "filename": "outlier", + "name": "A Dataset That Is 44% Outliers", + "number_format": 31, + "remove_quotes": true, + "separator": "SPACE", + "simplify_whitespaces": false, + "skip_empty_parts": true, + "use_first_row_for_vectorname": false + } + ], + "subcategory_name": "Politics" + }, + { + "datasets": [ + { + "DateTime_format": "yyyy-MM-dd", + "comment_character": "#", + "create_index_column": false, + "description": "This dataset contains variables that address the relationship between \npublic school expenditures and academic performance, as measured by the \nSAT. \n\nVARIABLE DESCRIPTIONS: \nColumns\n 1 - 16 Name of state (in quotation marks)\n18 - 22 Current expenditure per pupil in average daily attendance \n in public elementary and secondary schools, 1994-95 \n (in thousands of dollars)\n24 - 27 Average pupil/teacher ratio in public elementary and \n secondary schools, Fall 1994\n29 - 34 Estimated average annual salary of teachers in public \n elementary and secondary schools, 1994-95 (in thousands of \n dollars)\n36 - 37 Percentage of all eligible students taking the SAT, 1994-95\n39 - 41 Average verbal SAT score, 1994-95\n43 - 45 Average math SAT score, 1994-95\n47 - 50 Average total score on the SAT, 1994-95 ", + "download": "http://jse.amstat.org/datasets/sat.dat.txt", + "filename": "sat", + "name": "Getting What You Pay For: The Debate Over Equity in Public School Expenditures ", + "number_format": 31, + "remove_quotes": true, + "separator": "SPACE", + "simplify_whitespaces": false, + "skip_empty_parts": true, + "use_first_row_for_vectorname": false + }, + { + "DateTime_format": "yyyy-MM-dd", + "comment_character": "#", + "create_index_column": false, + "description": "Data are from the 1995 U.S. News report on American colleges and\nuniversities. They include demographic information on tuition,\nroom & board costs, SAT or ACT scores, application/acceptance\nrates, student/faculty ratio, graduation rate, and more. The\ndataset is used for the 1995 Data Analysis Exposition, sponsored\nby the Statistical Graphics Section of the American Statistical\nAssociation. See the file colleges.txt for more information on \nthe Exposition.", + "download": "http://jse.amstat.org/datasets/usnews.dat.txt", + "filename": "usnews", + "name": "U.S. News College data", + "number_format": 31, + "remove_quotes": true, + "separator": ",", + "simplify_whitespaces": false, + "skip_empty_parts": true, + "use_first_row_for_vectorname": false + } + ], + "subcategory_name": "Education" + } + ] + } + ], + "collection_name": "JSEDataArchive" +} diff --git a/data/datasets/OzDASL.json b/data/datasets/OzDASL.json index effd5e624..7d89c8ccd 100644 --- a/data/datasets/OzDASL.json +++ b/data/datasets/OzDASL.json @@ -1,9965 +1,2425 @@ { "categories": [ { "category_name": "Medicine", "subcategories": [ { "datasets": [ { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "West of Tokyo lies a large alluvial plain, dotted by a network of farming villages. Matui (1968) analysed the position of the 911 houses making up one of those villages. The area studied was a rectangle, 3 km by 4 km. A grid was superimposed over a map of the village, dividing its 12 square kilometres into 1200 plots, each 100 metres on a side. The numbers of houses on each of those plots are recorded in a 30 by 40 matrix of data.", "download": "http://www.statsci.org/data/general/matui.txt", "filename": "matui", "name": "Position of Houses in a Japanese Farming Village", "number_format": 31, "remove_quotes": true, "separator": "SPACE", "simplify_whitespaces": false, "skip_empty_parts": false, "use_first_row_for_vectorname": false }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "Larsen and Marx (1986) write \nSince Word War II, plutonium for use in atomic weapons has been produced at an Atomic Energy Commission facility in Hanford, Washington. One of the major safety problems encountered there has been the storage of radioactive wastes. Over the years, significant quantities of these substances - including strontium 90 and cesium 137 - have leaked from their open-pit storage areas into the nearby Columbia River, which flows along the Washington-Oregon border, and eventually empties into the Pacific Ocean. \nTo measure the health consequences of this contamination, an index of exposure was calculated for each of the nine Oregon counties having frontage on either the Columbia River or the Pacific Ocean. This particular index was based on several factors, including the county's stream distance from Hanford and the average distance of its population from any water frontage. As a covariate, the cancer mortality rate was determined for each of these same counties. \nThe data give the index of exposure and the cancer mortality rate during 1959-1964 for the nine Oregon counties affected. Higher index values represent higher levels of contamination. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nCounty\n\nName of county\n\nExposure\n\nIndex of exposure\n\nMortality\n\nCancer mortality per 100,000 man-years\n\n\n\n", "download": "http://www.statsci.org/data/general/hanford.txt", "filename": "hanford", "name": "Cancer Mortality near Hanford Reactor", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "The data show the incidence of nonmelanoma skin cancer among women in Minneapolis-St Paul, Minnesota, and Dallas-Fort Worth, Texas. The towns are coded 0 for St Paul and 1 for Forth Worth. \nOne would expect sun exposure to be greater in Texas than in Minnesota.", "download": "http://www.statsci.org/data/general/skin.txt", "filename": "skin", "name": "Skin Cancer in Texas and Minnesota", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "The data comes from an experiment to measure the mortality of cancer cells under radiation under taken in the Department of Radiology, University of Cape Town. Four hundred cells were placed on a dish, and three dishes were irradiated at a time, or occasion. After the cells were irradiated, the surviving cells were counted. Since cells would also die naturally, dishes with cells were put into the radiation chamber without being irradiated, to establish the natural mortality. This data gives only these zero-dose data. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nOccasion\n\nIrradiation occasion (1-27)\n\nSurvived\n\nNumber of cells surviving out of 400 placed on dish\n", "download": "http://www.statsci.org/data/general/radiatio.txt", "filename": "radiatio", "name": "Mortality of Cancer Cells", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The readabilities of 30 pamphlets about cancer are compared to the\nreading comprehension levels of 63 patients with cancer. Both\nvariables are measured in grade levels. The data are presented as\nfrequencies of occurrence over grade levels for both the pamphlet\nreadabilities and the reading levels of the patients.\n\nVARIABLE DESCRIPTIONS:\nColumns\n1 - 2 Grade level \n4 - 5 Frequency of occurrence for brochure readabilities\n7 - 8 Frequency of occurrence for patient reading levels", - "download": "http://jse.amstat.org/datasets/readability.dat.txt", - "filename": "readability", - "name": "Readability of Educational Materials for Patients with Cancer", - "number_format": 31, - "remove_quotes": true, - "separator": "SPACE", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": false - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The file ch14.dat contains the following 19 variables:\n\nPatient ID \nDate on study (MMDDYY)\nTreatment arm (D= daunorubicin, I= idarubicin)\nSex (M= male, F= female)\nAge (years)\nFAB classification (1 - 6)\nKarnofsky score (0 - 100) \nBaseline white blood cells (in thousands per cubic millimeter)\nBaseline platelets (in thousands per cubic millimeter)\nBaseline hemoglobin (g/dl)\nEvaluable (Y= yes, N= no)\nComplete remission (CR) (Y= yes, N= no)\nCourses of chemotherapy to CR\nDate of CR (MMDDYY)\nDate of last follow-up (MMDDYY)\nStatus at last follow-up (D= dead, A= alive)\nBone marrow transplant (Y= yes, N= no)\nDate of bone marrow transplant (MMDDYY)\nInclusion in June 30, 1988 analysis (Y= yes, N= no)", - "download": "http://lib.stat.cmu.edu/datasets/csb/ch14.dat", - "filename": "Leukemia-Trial", - "name": "Interpretation of a Leukemia Trial Stopped Early", - "number_format": 31, - "remove_quotes": true, - "separator": "SPACE", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": false - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The file ch15.dat contains the following variables:\n\n Patient ID : Integer\n \n Institution : 0 - Memorial Sloan-Kettering,\n 1 - Mayo Clinic,\n 2 - John Hopkins.\n Group : 1 - Study,\n 0 - Control.\n\n Means of Detection : 0 - Routine Cytology,\n 1 - Routine X-ray,\n 2 - Both X-ray and Cytology,\n 3 - Interval.\n\n Cell Type : 0 - Epidermoid,\n 1 - Adenocarcinoma,\n 2 - Large Cell,\n 3 - Oat Cell,\n 4 - Other.\n Stage : 4 digits, 1st digit (1,2,3) - overall stage,\n 2nd digit (1,2,3) - tumor,\n 3rd digit (0,1,2) - lymph nodes\n 4th digit (0,1) - distant metastases\n Operated : 1 - yes,\n 0 - no.\n Survival : Integer - Days from detection to last date known alive.\n Survival Category : 0 - Alive,\n 1 - Dead of lung cancer,\n 2 - Dead of other causes.\n\n Missing values - '-'.", - "download": "http://lib.stat.cmu.edu/datasets/csb/ch15.dat", - "filename": "Lung-Cancer", - "name": "Early Lung Cancer Detection Studies", - "number_format": 31, - "remove_quotes": true, - "separator": "SPACE", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": false - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The file ch16a.dat contains extent of scleral extension\n(extent to which the tumor has invaded the sclera or \"white of the eye\")\nas coded by two raters for each of 885 eyes. There is one record for each\neye; the first field of each record contains a patient identifier, the\nsecond field contains the code for scleral extension assigned by rater A,\nand the third field contains the code for scleral extension assigned by\nrater B. The coding scheme is:\n\n1=None or innermost layers\n2=Within sclera, but does not extend to scleral surface\n3=Extends to scleral surface\n4=Extrascleral extension without transection\n5=Extrascleral extension with presumed residual tumor in the orbit\n\nThe collaborative Ocular Melanoma Study (COMS) owns the\ncopyright to this dataset; these data are considered preliminary due\nto the ongoing nature of the COMS clinical trials.", - "download": "http://lib.stat.cmu.edu/datasets/csb/ch16a.dat", - "filename": "Choroidal-Melanoma", - "name": "Modeling Interrater Agreement for Pathological Features of Choroidal Melanoma", - "number_format": 31, - "remove_quotes": true, - "separator": "SPACE", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": false - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The file ch16b.dat contains the degree of necrosis (tissue\ndeath) data for 3 raters. The first field contains a patient identifier,\nand the second, third, and fourth fields contain the code for degree of\nnecrosis as assigned by raters A, B, and C, respectively. The coding\nscheme is:\n\n1=None\n2=Less than 10% of cells\n3=Greater than or equal to 10% of cells\n\n\nThe collaborative Ocular Melanoma Study (COMS) owns the\ncopyright to this dataset; these data are considered preliminary due\nto the ongoing nature of the COMS clinical trials.", - "download": "http://lib.stat.cmu.edu/datasets/csb/ch16b.dat", - "filename": "Choroidal-Melanoma-2", - "name": "Modeling Interrater Agreement for Pathological Features of Choroidal Melanoma", - "number_format": 31, - "remove_quotes": true, - "separator": "SPACE", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": false - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "These data remains the copyright of the Harris Birthright Research Unit\nof the University of Aberdeen, UK. It may be used freely for\nnon-commercial purposes and can be freely distributed provided its\nsource is acknowledged.\n\nThe file ch18a.dat contains the following individual-specific variables:\n\nVariable Coding\nControl/patient code 0=control, 1=patient\nStudy number 1-500 for each group\nNumber of smears 1-15\nBiopsy result 0=negative, 1=positive \n 9=missing (no biopsy)\t\nNumber of days from 0-840 if biopsy done, \nlast smear to biopsy -1 if no biopsy", - "download": "http://lib.stat.cmu.edu/datasets/csb/ch18a.dat", - "filename": "Cervical-Cancer", - "name": "Modeling the Precursors of Cervical Cancer", - "number_format": 31, - "remove_quotes": true, - "separator": "SPACE", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": false - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "These data remains the copyright of the Harris Birthright Research Unit\nof the University of Aberdeen, UK. It may be used freely for\nnon-commercial purposes and can be freely distributed provided its\nsource is acknowledged.\n\nThe file ch18a.dat contains the following smear-specific variables:\n\nVariable Coding \nControl/patient code 0=control, 1=patient \nStudy number 1-500 for each group \nSmear number 1-15 \nSmear grade 0=negative, 1=positive \nInterval in days 0-3733, 0 if 1st smear \nsince last smear", - "download": "http://lib.stat.cmu.edu/datasets/csb/ch18b.dat", - "filename": "Cervical-Cancer\n", - "name": "Modeling the Precursors of Cervical Cancer\n", - "number_format": 31, - "remove_quotes": true, - "separator": "SPACE", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": false } ], "subcategory_name": "Oncology" }, { "datasets": [ { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "The data gives the time between 800 successive pulses along a nerve fibre. There are 799 observations rounded to the nearest half in units of 1/50 second. ", "download": "http://www.statsci.org/data/general/nerve.txt", "filename": "nerve", "name": "Time between Nerve Pulses", "number_format": 31, "remove_quotes": true, "separator": "auto", "simplify_whitespaces": true, "skip_empty_parts": false, "use_first_row_for_vectorname": false - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "A group of female college students took a test that measured their verbal IQs and also underwent an MRI scan to measure the size of their brains (in 1000s of pixels)", - "download": "https://dasl.datadescription.com/download/data/3084", - "filename": "Brain-size", - "name": "Brain size", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "An experiment was performed to see whether sensory deprivation over an extended period of time has any effect on the alpha-wave patterns produced by the brain. To determine this, 20 subjects, inmates in a Canadian prison, were randomly split into two groups. Members of one group were placed in solitary confinement. Those in the other […] ", - "download": "https://dasl.datadescription.com/download/data/3085", - "filename": "Brain-waves", - "name": "Brain waves", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "A study examined brain size (measured as pixels counted in a digitized magnetic resonance image [MRI] of a cross section of the brain) and IQ (4 performance scales of the Wechsler IQ test) for college students. The data give the Performance IQ scores and Brain Size. ", - "download": "https://dasl.datadescription.com/download/data/3301", - "filename": "IQ-Brain", - "name": "IQ Brain", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true } ], "subcategory_name": "Neurology" }, { "datasets": [ { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "Students in an introductory statistics class (MS212 taught by Professor John Eccleston and Dr Richard Wilson at The University of Queensland) participated in a simple experiment. The students took their own pulse rate. They were then asked to flip a coin. If the coin came up heads, they were to run in place for one minute. Otherwise they sat for one minute. Then everyone took their pulse again. The pulse rates and other physiological and lifestyle data are given in the data. \nFive class groups between 1993 and 1998 participated in the experiment. The lecturer, Richard Wilson, was concerned that some students would choose the less strenuous option of sitting rather than running even if their coin came up heads, so in the years 1995-1998 a different method of random assignment was used. In these years, data forms were handed out to the class before the experiment. The forms were pre-assigned to either running or non-running and there were an equal number of each. In 1995 and 1998 not all of the forms were returned so the numbers running and sitting was still not entirely controlled.", "download": "http://www.statsci.org/data/oz/ms212.txt", "filename": "ms212", "name": "Pulse Rates before and after Exercise", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "A study was conducted at a major north eastern American medical centre regarding blood cholesterol levels and heart-attack incidents. A total of 28 heart-attack patients had their cholesterol levels measured two days, 4 days, and 14 days after the attack. In addition, cholesterol levels were recorded for a control group of 30 people who had not had a heart attack. The units of cholesterol measurement are not given in the original reference but are presumably mg/dL of blood.", "download": "http://www.statsci.org/data/general/cholestg.txt", "filename": "cholestg", "name": "Cholesterol Levels after Heart Attack", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "This was a pilot study for the experiment described in Recovery of Patients from Stroke. The purpose of the study was to compare four evaluation tools for assessing the recovery of patients who had recently suffered a stroke. The four tools were (1) the Goteburg Assessment Form of Hemiplegia, (2) the Bobath Assessment Form, (3) the Barthel Index and (4) the Kenny Scoring System. The Goteburg Assessment was divided into seven components measuring motor function and balance, some sensation qualities, passive range of motion and occurrence of joint pain. The Bobath from evaluates three areas of motor performance, postural reactions, voluntary movement, and balance and automatic protective reactions. The Barthel index and the Kenny Scoring system evaluate ability to carry out activities of daily living such as dressing, feeding, toileting etc. \nTwenty subjects were selected from two large public hospitals in Brisbane. All subjects had recently suffered a cerebrovascular accident resulting in hemiplegia lasting at least 24 hours, had not previously been incapacitated from stroke or other disease and were currently receiving occupational therapy. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nSubject \n\nSubject ID (1-20)\n\nSex\n\nMale (M) or female (F)\n\nSide\n\nSide of brain affected, left (L) or right (R)\n\nAge\n\nAge of subject in years\n\nLapse\n\nTime since occurrence of stroke in weeks\n\nArms\n\nArm and shoulder motor function (max 36)\n\nLegs\n\nLower limb motor function (max 30)\n\nHands\n\nWrist and hand motor function (max24)\n\nBalance\n\nBalance score (max 14)\n\nSensation\n\nSensation score (max 24)\n\nJointPain\n\nFreedom from joint pain (max 24)\n\nJointMotion\n\nPassive joint motion (max 24)\n\nBobath\n\nTotal of Bobath Assessment Form (max 266)\n\nBarthel\n\nBarthel Index (max 100)\n\nKenny\n\nKenny scoring system of dailing living (max 24)\n\n\n\n\nThe researcher chose the Barthel Index and the first five components of the Goteburg Evaluation for use in the later experiment.", "download": "www.statsci.org/data/oz/strokeass.txt", "filename": "strokeass", "name": "Evaluation Tools for Stroke Rehabilitation", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "This study compared three occupational therapy programs designed to help patients recover from the effects of a stroke. Eight stroke patients were assigned to each of the three treatment groups. The first group (E) was given an experimental program developed by the investigator from a model of intervention for stroke rehabilitation. The second group (F) was given a pre-existing program. The third group (G) was a non-treatment program. Each program lasted for 8 weeks. All subjects were evaluated at the start of the program and at weekly intervals until the next of the program. \nGroup E and F patients were treated in the Occupational Therapy Department of a large Brisbane repatriation hospital. Group G patients were located in the wards of a large State Hospital in Brisbane. \nThe recovery status of each subject at each time was evaluated using the Goteburg Evaluation of Hemiplegia and the Barthel Index. The Goteburg evalation form gave separate scores for three motor function variables (upper limbs, hand and wrist, lower limbs) and for balance while the Barthel Index gave a single overall score. Higher scores indicate better functional ability. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nSubject \n\nSubject ID\n\nGroup\n\nExperimental (E), pre-existing (F) or non-treatment (G)\n\nSex\n\nMale (M) or female (F)\n\nSide\n\nSide of brain affected, left (L) or right (R)\n\nAge\n\nAge of subject in years\n\nLapse\n\nTime lapse from stroke to start of program in weeks\n\nUE1\n\nUpper extremities score (out of 36) at week 1\n\nUE2\n\n... week 2\n\nUE3\n\n... week 3\n\nUE4\n\n... week 4\n\nUE5\n\n... week 5\n\nUE6\n\n... week 6\n\nUE7\n\n... week 7\n\nUE8\n\n... week 8\n\nHW1\n\nHand-wrist score (out of 24) at week 1\n\nHW2\n\n... week 2\n\nHW3\n\n... week 3\n\nHW4\n\n... week 4\n\nHW5\n\n... week 5\n\nHW6\n\n... week 6\n\nHW7\n\n... week 7\n\nHW8\n\n... week 8\n\nLE1\n\nLower extremities score (out of 30) at week 1\n\nLE2\n\n... week 2\n\nLE3\n\n... week 3\n\nLE4\n\n... week 4\n\nLE5\n\n... week 5\n\nLE6\n\n... week 6\n\nLE7\n\n... week 7\n\nLE8\n\n... week 8\n\nBal1\n\nBalance score (out of 14) at week 1\n\nBal2\n\n... week 2\n\nBal3\n\n... week 3\n\nBal4\n\n... week 4\n\nBal5\n\n... week 5\n\nBal6\n\n... week 6\n\nBal7\n\n... week 7\n\nBal8\n\n... week 8\n\nBart1\n\nBarthel Index score (out of 100) at week 1\n\nBart2\n\n... week 2\n\nBart3\n\n... week 3\n\nBart4\n\n... week 4\n\nBart5\n\n... week 5\n\nBart6\n\n... week 6\n\nBart7\n\n... week 7\n\nBart8\n\n... week 8\n", "download": "http://www.statsci.org/data/oz/stroke.txt", "filename": "stroke", "name": "Recovery of Patients from Stroke", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The Framingham Heart Study is one of the longest running health studies. It has followed original subjects, their children, and their grand children, looking for factors that affect cardiac health.\nThese data only include\nsubjects whose cholesterol was measured in the first exam.\nSource: “Statistical Methods in Epidemiology” by H.A.Kahn and C.T.Sempos\nSBP: Systolic blood pressure at first exam\nDBP: Diastolic blood pressure at first exam\nCHOL: Serum choloesterol at first exam\nFRW : Framingham relative weight; a standardized measure of weight adjusted for sex and height\nCIG: Number of cigarettes smoked/day at first exam\nDEATH: First biannual exam missed due to death; 0=”alive at tenth biannual exam.” (This exam wasgiven in the 18th year of the study.)\nCAUSE: 0=aliv e at exam 10, 1=Coronary Heart Disease (sudden), 2=CHD (not sudden), 3=Stroke,4=Other cardiovascular disease, 5=cancer, 6=other", - "download": "https://dasl.datadescription.com/download/data/3217", - "filename": "Framingham", - "name": "Framingham", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "number of days spent in hospital by patients admitted to hospitals in New York during one year with a primary diagnosis of acute myocardial infarction (heart attack). Data are from public medicare records. Consider the distribution of stays. The data also include the age and sex of the patient and the diagnostic (DRG) code. \n", - "download": "https://dasl.datadescription.com/download/data/3263", - "filename": "Heart-attack-charges", - "name": "Heart attack charges", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Number of days spent in hospital by female patients admitted to hospitals in New York during one year with a primary diagnosis of acute myocardial infarction (heart attack). Data are from public medicare records. Consider the distribution of stays. The data also include the age of the patient", - "download": "https://dasl.datadescription.com/download/data/3264", - "filename": "Heart-attack-stays", - "name": "Heart attack stays", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "A medical researcher measured the pulse rates (beats per minute) of a sample of randomly selected adults.", - "download": "https://dasl.datadescription.com/download/data/3413", - "filename": "Pulse-rates", - "name": "Pulse rates", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true } ], "subcategory_name": "Cardiology" }, { "datasets": [ { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "How difficult is it to maintain your balance while concentrating? It is more difficult when you are older? Nine elderly (6 men and 3 women) and eight young men were subjects in an experiment. Each subject stood barefoot on a \"force platform\" and was asked to maintain a stable upright position and to react as quickly as possible to an unpredictable noise by pressing a hand held button. The noise came randomly and the subject concentrated on reacting as quickly as possible. The platform automatically measured how much each subject swayed in millimetres in both the forward/backward and the side-to-side directions.", "download": "http://www.statsci.org/data/general/balaconc.txt", "filename": "balaconc", "name": "Maintaining Balance while Concentrating", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "The data comes from a small study in Western Australia of hypertension, alcohol, and obesity. This study was partly designed to mimic a previously reported U.S. study based on a larger sample. A log-linear interaction model is a convenient and effective way of investigating associations among the three variables. A prior-posterior analysis of this 3 x 2 x 4 contingency table using prior information from the previous study (Klatsky et al., 1977) may be appropriate. The previous study reported the general conclusion that alcohol intake and obesity were significantly and independently associated with hypertension (blood pressure). Although a few summary statistics were reported, the full data were not published. One difference between the two studies was in the definition of obesity categories.\nThe data is listed as follows: the first column (Obesity) contains a numerical value representing the level of obesity (1=low, 2=average, 3=high), the second column (BP) contains a numerical indicator of the presence of hypertension (0=no, 1 =yes). The next five columns are labelled with the levels of alcoholic intake of the subjects, in drinks per day. These columns contain the frequency of observations that have this level of intake, for each group of obesity level and hypertension presence.", "download": "http://www.statsci.org/data/oz/alchyp.txt", "filename": "alchyp", "name": "Alcohol, Hypertension and Obesity", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "FEV (forced expiratory volume) is an index of pulmonary function that measures the volume of air expelled after one second of constant effort. The data contains determinations of FEV on 654 children ages 6-22 who were seen in the Childhood Respiratory Desease Study in 1980 in East Boston, Massachusetts. The data are part of a larger study to follow the change in pulmonary function over time in children. \nID\n - \nID number\nAge\n - \nyears\nFEV\n - \nlitres\nHeight\n - \ninches\nSex\n - \nMale or Female\nSmoker\n - \nNon = nonsmoker, Current = current smoker\n", "download": "http://www.statsci.org/data/general/fev.txt", "filename": "fev", "name": "Childhood Respiratory Disease", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "The data give the results of an study aimed at reducing the risk of HIV infection among African-American adolescents. The subjects were 14-18 year old female and male adolescents in a Southern USA city. The study compared two interventions. The treatment intervention was an 8-week Behavioural Skills Training (BST) program. The control was an single 2-hour education session about HIV and AIDS. The subjects completed sexual attitude and activity questionnaires before and after the intervention and at 6-month and 12-month follow-ups. The data here are for 10 subjects for each intervention although the original study was much larger. The data given here appear to have been created by Howell (1999) based on summary statistics from the original study. The dependent variable is the logarithm-transformed frequency of condom-protected sex ( log(Y+1) ). \n\n\n\n\nVariable\n\nDescription\n\n\n\n\nBST\n\n1 = BST intervention, 0 = control\n\nPre\n\nLog-frequency of protected sex before the intervention\n\nPost\n\nLog-frequency of protected sex after the intervention\n\nFU6\n\nLog-frequency of protected sex reported at the 6 months follow-up\n\nFU6\n\nLog-frequency of protected sex reported at the 12 months follow-up\n", "download": "http://www.statsci.org/data/general/protsex.txt", "filename": "protsex", "name": "Behavioural Skills Training and Protected Sex", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "HARVEST (Hypertension and Ambulatory Recording Venetia Study) is a trial designed to assess whether ambulatory monitoring adds something to office (clinical) blood pressure in predicting the development of fixed hypertension and of cardiovascular complications in patients with borderline to mild hypertension. Ambulatory monitoring refers to the measuring of home blood pressure by an annotated device that the subject wears for 24 hours. The data give information on 1100 subjects compiled by Dr Paolo Palatini, Professor of Clinical Medicine at the University of Padua, Italy. \nPatients were eligible for the study if they satisfied the following criteria: \ndiastolic blood pressure (BP) between 90 and 100 mm Hg or isolated systolic hypertension (systolic BP greater than or equal to 140 mm Hg and diastolic BP less than 90 mm Hg) \nnever been treated for hypertension \naged 18 to 45 years old \nfree from other important risk factors for atherosclerosis\nThe subjects were followed for 5 years. Baseline examinations, including ECG and echocardiography, were repeated at the end of the study or upon development of hypertension, defined as BP persistently 100 mm Hg or greater or a systolic BP of 160 mm Hg or greater. Ambulatory monitoring was repeated 3 months and 5 years after the baseline evaluation.\nThe symbol C or A after the name of a variable means:\nC = clinical examination; A = ambulatory (home monitoring)\nThe last symbol of a variable name may be B, 3, 5 or E:\nB = baseline examination\n3 = 3-month examination\n5 = 5-year examination\nE = endpoint examination \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nSmoke\n\nSmoking status at baseline examination:\n0 = non-smoking,\n1 = 1-5 cigarettes per day,\n2 = 6-10 cigarettes per day,\n3 = 11-20 cigarettes per day.\n\nSport\n\nSport activity at baseline examination:\n0 = only sedentary,\n1 = light activity (walking),\n2 = sports non-competitive,\n3 = sports competitiv.\n\nSBP\n\nSystolic blood pressure\n\nDBP\n\nDiastolic blood pressure\n\nHR\n\nHeart rate\n\nAge\n\nAge in years\n\nBMI\n\nBody mass index: 100 * weight (kg) / height (m)2\n\nEndPoint\n\nEndpoint status at the time the file was created:\n1 = blood pressure level hypertensive\n0 = blood pressure level not hypertensive\n\nTime\n\nTime in months from baseline examination to the date of endpoint or to May 30, 1999, whichever was earlier\n\nMale\n\nGender:\n1 = male\n0 = female\n", "download": "http://www.statsci.org/data/general/harvest.txt", "filename": "harvest", "name": "HARVEST Trial", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "The data is a subset from the Six Cities study, a longitudinal study of the health effects of air pollution. The data contain repeated binary measures of the wheezing status (1 = yes, 0 = no) for each of 537 children from Stuebenville, Ohio, at ages 7, 8, 9 and 10 years. Also measured is whether or not the mother was a smoker during the first year of the study.", "download": "http://www.statsci.org/data/general/wheeze.txt", "filename": "wheeze", "name": "Child's Wheeze and Mother's Smoking", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "The National Trachoma and Eye Health Program (1980) reports on the prevalance of otitis media (an infection that produces pus within the middle ear) in both aboriginal and non-aboriginal communities in Australia. The Program surveyed all aboriginal communities in Australia and attempted to contact all aborigines. Simultaneously, contact was made with non-aborigines usually living in the same or adjacent locations. Because of the high prevalence of infection in the aboriginal community only severe cases were classified as infected, virtually all of them suffering bursting of the ear drum and consequent scarring. It was thought that scarring could be used to identify those people who previously had had sever infections, but were not currently infected. So it was possible to classify subjects as (a) either not currently infected and no scarring, (b) currently infected or (c) not currently infected but one or more drums scarred. The data give the number of aborigines examined in various age intervals and the proportions classified as (a), (b) or (c). \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nAge \n\nAge interval (years)\n\nExamined \n\nNumber of subjected examined\n\nNone \n\nProportion not currently infected and with no scarring\n\nCurrent \n\nProportion currently infected\n\nPast \n\nPropotion not currently infected but with one or both drums scarred\n", "download": "http://www.statsci.org/data/oz/otitis.txt", "filename": "otitis", "name": "Prevalence of Otitis Media in Aboriginal Communities", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "In a study of the effect of ticks on cattle in North Queensland, the disease status of animals exposed to the tick-borne parasite Anaplasma marginale is of some concern. A symptom of infection from this parasite, the number of red blood cells can be redu ced by up to 80% at the point of peak anaemia. The problem to be considered here concerns a way of quantifying the change in red blood cell populations during the recovery stages of the disease. \nIn a laboratory trial, cows were inoculated with the parasite and their red blood cells monitored before and after inoculation. The data collected were in the form of red cell volume distributions obtained from a Coulter counter, truncated and sorted into groups. In work as yet unpublished, McLaren et al. have addressed the problem of fitting distributions to similar data from humans suffering myelodysplastic anaemia, and McLaren (private communication) has suggested the need to develop hypothesis testing procedures for this type of data. \nThe observed counts of red cell volume from one of the cows on days 21 (Freq1) and 23 (Freq2) after inoculation are listed. The counts are grouped into 18 intervals of equal width of 7.2 fl. The first column (Group) lists the group number, the second (Vol) lists the truncated lower endpoint of the cell volume interval. The lower and upper truncation values for these red cell volume counts were 21.6 fl and 151.2 fl respectively. A cursory inspection of the two sets of observed frequency counts in histogram form on the logarithmic scale suggest that the red blood cell volume distribution is bimodal, at least at 21 days after inoculation. \n", "download": "http://www.statsci.org/data/oz/rbcmix.txt", "filename": "rbcmix", "name": "Red Blood Cell Volume Data for Cows", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Does blood pressure, on average, change with age. The data here are two categorical variables: Blood pressure categorized as High, Normal, Low, and Age categorized as under 30, 30-49, and over 50", - "download": "https://dasl.datadescription.com/download/data/3077", - "filename": "Blood-Pressure", - "name": "Blood Pressure", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Thirteen overweight women volunteered for a study to determine whether eating specially prepared crackers before a meal could help them lose weight. The subjects were randomly assigned to eat crackers with different types of fiber (bran fiber, gum fiber, both, and a control cracker) and cycled through several of the cracker alternatives. Unfortunately, some of the women developed uncomfortable bloating and upset stomachs. Researchers suspected that some of the crackers might be at fault. The study was paid for by the manufacturers of the gum fiber, who hoped this would be a new diet tool. What would you recommend to them about the prospects for marketing their new diet cracker?", - "download": "https://dasl.datadescription.com/download/data/3163", - "filename": "Diet", - "name": "Diet", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, + } + ], + "subcategory_name": "Common" + }, + { + "datasets": [ { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "Medical researchers followed 6272 Swedish men for 30 years to see whether there\nwas any association between the amount of fish in their diet and prostate cancer. The original study actually used pairs of twins, which enabled the researchers to discern that the risk of cancer for those who never ate fish actually was substantially greater.", - "download": "https://dasl.datadescription.com/download/data/3207", - "filename": "Fish-diet", - "name": "Fish diet", + "description": "The effect of a single 600 mg dose of absorbic acid versus a sugar placebo on the muscular endurance (as measured by repetitive grip strength trials) of fifteen male volunteers (19-23 years old) was evaluated. The study was conducted in a double-blind manner with crossover. \nThree initial maximal contractions were performed for each subject, with the greatest value indicating maximal grip strength. Muscular endurance was measured by having the subjects squeeze the dynamometer, hold the contraction for three seconds, and repeat continuously until a value of 50% maximum grip strength was achieved for three consecutive contractions. Endurance time was defined as the number of repetitions required to go from maximum grip strength to the initial 50% value. Subjects were given frequent positive verbal encouragement in an effort to have them complete as many repetitions as possible. ", + "download": "http://www.statsci.org/data/general/vitaminc.txt", + "filename": "vitaminc", + "name": "Effect of Vitamin C on Muscular Endurance", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "A student decided to investigate just how effective washing with soap is in eliminating bacteria. To do this she tested four different methods—washing with water only, washing with regular soap, washing with antibacterial soap (ABS), and spraying hands with antibacterial spray (AS) (containing 65% ethanol as an active ingredient). Her experiment consisted of one experimental factor, the washing Method, at four levels.\nShe suspected that the number of bacteria on her hands before washing might vary considerably from day to day. To help even out the effects of those changes, she generated random numbers to determine the order of the four treatments. Each morning, she washed her hands according to the treatment randomly chosen. Then she placed her right hand on a sterile media plate designed to encourage bacteria growth. She incubated each plate for 2 days at 36°C, after which she counted the bacteria colonies. She replicated this procedure 8 times for each of the four treatments.", - "download": "https://dasl.datadescription.com/download/data/3254", - "filename": "Hand-washing", - "name": "Hand washing", + "description": "Proponents of Reiki, a type of touch therapy, hypothesize that Reiki re-establishes the energy balance in areas of the body experiencing disease and discomfort, thus promoting healing, reducing pain and increasing quality of life. The main feature that distinguishes Reiki from other couch therapies, such as therapeutic touch, is that Reiki therapists have physical contact with the body. Participants in Reiki are fully-clothed and may be covered with a blanket if they wish. The treatment, delivered to 18 specific areas of the body, begins with the participant lying on his or her back. The hands are placed on 10 distinct locations on the head and torso. The participant is then asked to lie on his or her stomach (or side, if this is more comfortable), where the hands are placed on 8 additional distinct locations covering the back, hip area and feet. The treatment takes approximately 1.25 hours to complete. \nTreatment of cancer pain usually focuses on opioids. Since high doses of opioids frequently aggravate other common symptoms of cancer patients, it is of interest to explore non-drug treatments that may allow control of cancer pain with lower doses of opioids. This project studied whether Reiki is beneficial in the management of pain for people from the community experiencing general chronic pain, as a prelimary step in deciding whether Reiki is worth trying for cancer patients. \nThe Sample \nThe eligibility criteria were that subjects must be at least 18 years old, not receiving chemotherapy or radiotherapy, be experiencing moderate pain (at least 3 on a VAS (0-10) or 2 on a Likert scale (0-5)), have normal cognitive function, be able to speak, read and write English, and be willing to complete the study rating scales. The sample size necessary was calculated using the binomial distribution with the assumption that 50% of the study participants might be expected to benefit from treatment. The probability of a decrease in pain following treatment in 14 or more cases out of 20 by chance alone is 0.058. [VAS means \"Visual Analogue Scale\". A Likert-type item consists of a single statement, followed by a usually five or six-point choice with each choice described in words.] \nNotices were placed in retail establishments and community centres. Potential participans identified themselves by telephoning the research assistant at a number provided on the recruitment posters. Individuals who met the eligibility criteria and who signed a consent form were scheduled to receive a treatment by a Reiki therapist. \nTwenty People were recruited (18 women and 2 men) who ranged in age from 23 to 62 years (mean 44 years). These participants were currently experiencing pain at 55 sites. Ten participants had pain in their upper body and 4 in their lower body. The remaining 6 participants had pain in both the upper and lower parts of their body. Eight participants attributed their pain to bone and muscle problems and 5 participants to chronic illness. Three of the participants included in the chronic illness group had cancer. Six participants had been experiencing pain for 1 year or less, and 7 had been experiencing pain for more than 1 year, up through 7 years. The remaining seven had been in pain for more than 7 years, one for 48 years. \nEighteen participants had asked their physician for help with their pain, and 19 were currently using at least 1 of the following strategies to manage it: analgesic preparations, anti-inflammatory medications, exercise, massage, acupuncture, therapeutic touch, chiropractic, homeopathy, meditation, vitamins, steam, muscle relaxation techniques and Tai Chi. \nTreatment and Data Collection \nParticipants were given 1 treatment by the Reiki therapist in her office. They lay on a massage table fully clothed and, if desired, were also covered with a sheet or blanket. The lights were dimmed, and a candle was lit; soft music played in the background. The environment was consistent through all 20 treatments. A pain VAS ranging from 0 to 10 and a Likert scale ranging from 0 to 5 were completed immediately before and after the Reiki treatment. ", + "download": "http://www.statsci.org/data/general/reiki.txt", + "filename": "reiki", + "name": "Using Reiki to Manage Pain", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "The heights and weights of students in a statistics class were recorded. ", - "download": "https://dasl.datadescription.com/download/data/3265", - "filename": "Heights-weights", - "name": "Heights and weights", + "description": "The data consist of measurements (x1, x2, Age in months) on 23 babies, collected in the Faculty of Medicine at the University of Hong Kong. It would be of great medical interest to find a relationship between x1 and x2. However, any correlation between them is likely spurious because both x1 and x2 tend to increase with age. See Chris Lloyd's original mailing to the ANZStat mailing list discussion.", + "download": "http://www.statsci.org/data/general/babies.txt", + "filename": "babies", + "name": "Measurements on Babies", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "Canadian researcher John Coates took saliva samples in\nthe morning, twice a day for eight days, from 17 men working on a London\nmid-size trading f loor (trading a wide range of assets, with largest exposure to\nGerman interest rate futures), in June 2005, and classified each trader according\nto whether his testosterone level was high or low on that day (compared\nwith the trader’s median over the period). High testosterone days differed from\ntrader to trader, and high days differed from low days on average by 25% in\ntestosterone level. He also recorded the profits or losses (P&L) in pounds sterling\nof each trader during 11 am–4 pm daily.", - "download": "https://dasl.datadescription.com/download/data/3272", - "filename": "Hormones", - "name": "Hormones", + "description": "When anthropologists analyze human skeletal remains, an important piece of information is living stature. Since skeletons are commonly based on statistical methods that utilize measurements on small bones. The following data was presented in a paper in the American Journal of Physical Anthropology to validate one such method. \n\n\n\n\nVariable\n\nDescription\n\n\n\n\nMetaCarp\n\nMetacarpal bone I length in cm\n\nStature\n\nStature in cm\n\n\n\n", + "download": "http://www.statsci.org/data/general/stature.txt", + "filename": "stature", + "name": "Prediction of Height from Metacarpal Bone Length", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "Since the 1960s, the Centers for Disease Control and Prevention’s National Center for Health Statistics has been collecting health and nutritional information on people of all ages and backgrounds. The National Health and Nutrition Examination Survey (NHANES) of 2001–2002, measured a wide variety of variables, including body measurements, cardiovascular fitness, blood chemistry, and demographic information on more than 11,000 individuals.\nThe file holds data on the weights of 80 men between 19 and 24 years old of average height (between 5′8″ and 5′10″ tall).", - "download": "https://dasl.datadescription.com/download/data/3337", - "filename": "Mens-Weights", - "name": "Mens Weights", + "description": "CPK (creatine phosphokinase) is a enzyme contained within muscle cells which is necessary for the storage and release of energy. It can be released into the blood in response to vigorous exercise from damaged (leaky) muscle cells. This occurs often even in healthy athletes. \nThis study intestigated the metabolic effect of cross-country skiing. Subjects were participants in a 24 hour cross-country relay. Age, weight (kg) and blood CPK concentration 12 hours into the relay were recorded.", + "download": "http://www.statsci.org/data/general/bloodcpk.txt", + "filename": "bloodcpk", + "name": "Blood CPK in Cross-Country Skiers", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "In 1879, A. A. Michelson made 100 determinations of the velocity\nof light in air using a modification of a method proposed by the French\nphysicist Foucault. The data are given here as reported by Stigler.\nThe measurements are derived from sets of often widely disparate\nnumbers of observations. The numbers are in km/sec, and have had\n299,000 subtracted from them. The currently accepted “true”\nvelocity of light in vacuum is 299,792.5 km/sec. Stigler has\napplied the corrections used by Michelson and reports that the\n“true” value appropriate for comparison to these measurements\nis 734.5. Each trial may be a summary of several experimental\nobservations.", - "download": "https://dasl.datadescription.com/download/data/3338", - "filename": "Michelson_", - "name": "Michelson", + "description": "Studies conducted at the University of Melbourne indicate that there may be a difference between the pain thresholds of blonds and brunettes. Men and women of various ages were divided into four categories according to hair colour: light blond, dark blond, light brunette, and dark brunette. The purpose of the experiment was to determine whether hair colour is related to the amount of pain produced by common types of mishaps and assorted types of trauma. Each person in the experiment was given a pain threshold score based on his or her performance in a pain sensitivity test (the higher the score, the higher the person’s pain tolerance). \n\nVariable\n\nValues\n\nHairColour\n\nLightBlond, DarkBlond, LightBrunette or DarkBrunette \n\nPain\n\nPain theshold score \n", + "download": "http://www.statsci.org/data/oz/blonds.txt", + "filename": "blonds", + "name": "Pain Thresholds of Blonds and Brunettes", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "The National Health and Nutrition Examination Survey (NHANES) is a program of studies designed to assess the health and nutritional status of adults and children in the United States. The survey is unique in that it combines interviews and physical examinations. ", - "download": "https://dasl.datadescription.com/download/data/3365", - "filename": "NHANES", - "name": "NHANES", + "description": "For his MS305 data project, Michael Larner measured the weight and various physical measurements for 22 male subjects aged 16 - 30. Subjects were randomly chosen volunteers, all in reasonable good health. Subjects were requested to slightly tense each muscle being measured to ensure measurement consistency. Apart from Mass, all measurements are in cm. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nMass\n\nWeight in kg\n\nFore\n\nMaximum circumference of forearm\n\nBicep\n\nMaximum circumference of bicep\n\nChest\n\nDistance around chest directly under the armpits\n\nNeck\n\nDistance around neck, approximately halfway up\n\nWaist\n\nDistance around waist, approximately trouser line\n\nThigh\n\nCircumference of thigh, measured halfway between the knee and the top of the leg\n\nCalf\n\nMaximum circumference of calf\n\nHeight\n\nHeight from top to toe\n\nShoulders\n\nDistance around shoulders, measured around the peak of the shoulder blades\n", + "download": "http://www.statsci.org/data/oz/physical.txt", + "filename": "physical", + "name": "Mass and Physical Measurements for Male Subjects", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "Body temperatures of a random sample of 52 healthy adults, reported in degrees Fahrenheit. ", - "download": "https://dasl.datadescription.com/download/data/3368", - "filename": "Normal-temperature", - "name": "Normal temperature", + "description": "Larsen and Marx (1986) write \nIn folklore, the full moon is often portrayed as something sinister, a kind of evil force possessing the power to control our behaviour. Over the centuries, many prominent writers and philosophers have shared this belief. Milton, in Paradise Lost, refers to \nDemoniac frenzy, moping melancholy\nAnd moon-struck madness. \nAnd Othello, after the murder of Desdemona, laments \nIt is the very error of the moon\nShe comes more near the earth than she was want\nAnd makes men mad. \nOn a more scholarly level, Sir William Blackstone, the renowned eighteenth centure English barrister, defined a \"lunatic\" as \none who hath ... lost the use of his reason and who hath lucid intervals, sometimes enjoying his senses and sometimes not, and that frequently depending upon changes of the moon. \nThe data give the admission rates to the emergency room of a Virginia mental health clinic before, during and after the 12 full moons from August 1971 to July 1972. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nMonth\n\nMonth of year: Aug, Sep, ... Jul\n\nMoon\n\nBefore, During or After the full moon\n\nAdmission\n\nAdmission rate (patients/day)\n\n\n\n", + "download": "http://www.statsci.org/data/general/fullmoon.txt", + "filename": "fullmoon", + "name": "Mental Hospital Admissions During Full Moons", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "Obesity and exercise", - "download": "https://dasl.datadescription.com/download/data/3372", - "filename": "Obesity-and-exercise", - "name": "Obesity and exercise", + "description": "Five types of electrodes were applied to the arms of 16 subjects and the resistance measured. The experiment was designed to see whether all five electrode types performed similarly. \nAfter obtaining the results, the experimenters decided that the reason for the two large readings on subject 15 was the excessive amount of hair of those parts of the subject's arm. They concluded that this subject's data should be deleted. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nSubject\n\nSubject number\n\nE1\n\nResistance measured by electrode type 1\n\nE2\n\nResistance measured by electrode type 2\n\nE3\n\nResistance measured by electrode type 3\n\nE4\n\nResistance measured by electrode type 4\n\nE5\n\nResistance measured by electrode type 5\n", + "download": "http://www.statsci.org/data/general/resist.txt", + "filename": "resist", + "name": "Skin Resistance", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "Story: \nThe Pima Indians of southern Arizona are a unique community. Their ancestors were among the first people to cross over into the Americas some 30,000 years ago. For at least two millennia, they have lived in the Sonoran Desert near the Gila River. Known throughout history as a generous people, they have given of themselves for the past 30 years helping researchers at the National Institutes of Health study certain diseases like diabetes and obe-sity. Young Pima Indians often marry other Pimas, making them an ideal group for genetic researchers to study. Pimas also have an extremely high incidence of diabetes.\nResearchers investigating factors for increased risk of diabetes examined data on 768 adult women of Pima Indian heritage. One possible predictor is the body mass index, BMI, calculated as weight/height2, where weight is measured in kilograms and height in meters. We are interested in the relationship between BMI and the incidence of diabetes. ", - "download": "https://dasl.datadescription.com/download/data/3394", - "filename": "Pima-indians", - "name": "Pima indians", + "description": "Osteoarthritis is a mechanical degeneration of joint surfaces causing pain, swelling and loss of joint function in one or more joints. Physiotherapists treat the affected joints to reduce pain (VAS = visual analogue scale) and to increase the range of movement (ROM). In this study there were 10 subjects, each of whom was treated with continuous TENS (electric nerve stimulation) and short wave diathermy. Measurements were taken also after no treatment. \n\n\n\n\nVariable\n\nDescription\n\n\n\n\nSubject\n\nSubject identifier\n\nNoROM\n\nROM after no treatment\n\nNoVAS\n\nVAS after no treatment\n\nTENSROM\n\nROM after continuous TENS\n\nTENSVAS\n\nVAS after continuous TENS\n\nSWDROM\n\nROM after short wave diathermy\n\nSWDVAS\n\nVAS after short wave diathermy\n", + "download": "http://www.statsci.org/data/oz/oa.txt", + "filename": "oa", + "name": "Treatment for Osteoarthritis", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "Pregnancies", - "download": "https://dasl.datadescription.com/download/data/3404", - "filename": "Pregnancies", - "name": "Pregnancies", + "description": "The balance of subjects were observed for two different surfaces and for restricted and unrestricted vision. Balance was assessed qualitatively on an ordinal 4-point scale based on observation by the experimenter. Subjects were expected to be better balanced (show less sway) when standing on the normal surface than on foam, and when their eyes were open rather than closed or when their vision was restricted by a dome. \nEqual numbers of male and female subjects were chosen. For both males and females, ten older (more than 24 years old) and ten younger subjects were selected. \nThe data is available in two formats. The is in univariate or \"strung out form\" which is suitable for entry to Minitab or S-Plus and to most mixed model programs. The second is in repeated measures format which is suitable for SPSS and for most special purpose repeated measures programs. \nUnivariate format: \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nSubject\n\n1 to 40\n\nSex\n\nmale or female\n\nAge\n\nAge of subject in years\n\nHeight\n\nHeight in cm\n\nWeight\n\nWeight in kg\n\nSurface\n\nnormal or foam\n\nVision\n\neyes open, eyes closed, or closed dome\n\nCTSIB\n\nQualitive measure of balance, 1 (stable) - 4 (unstable) \n\n\n\n", + "download": "http://www.statsci.org/data/oz/ctsibuni.txt", + "filename": "ctsibuni", + "name": "Effect of Surface and Vision on Balance", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "The Sleep Foundation (www.sleepfoundation.org) says that adults should get at least 7 hours of sleep each night. A survey of students at a small school in the northeast U.S. asked, among other things, “How much did you sleep last night?” The data are the responses. ", - "download": "https://dasl.datadescription.com/download/data/3453", - "filename": "Sleep", - "name": "Sleep", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Time of Birth, Sex, and Birth Weight of 44 Babies", - "download": "http://jse.amstat.org/datasets/babyboom.dat.txt", - "filename": "babyboom", - "name": "babyboom", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": false - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "This dataset contains 21 body dimension measurements as well as age, \nweight, height, and gender on 507 individuals. The 247 men and 260 \nwomen were primarily individuals in their twenties and thirties, with a \nscattering of older men and women, all exercising several hours a week.", - "download": "http://jse.amstat.org/datasets/body.dat.txt", - "filename": "Body", - "name": "Exploring Relationships in Body Dimensions", - "number_format": 31, - "remove_quotes": true, - "separator": "SPACE", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": false - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Each record contains the results of a laboratory analysis of calcium, \ninorganic phosphorous, and alkaline phosphatase. The variable cammol \nis measured as millimoles per liter. Phosmol is inorganic phosphorous \nin millimoles per liter. Alkphos is meauring alkaline phosphatase in \ninternational units per liter. The purpose of the study was to \ndetermine if significant gender differences exist in the mean values \nof calcium, inorganic phosphorus, and alkaline phosphatase in \nsubjects over age 65. A second purpose was to determine if analytical \nvariation between laboratoreis would affect the mean values of the study variables. \nCalcium.dat contains incorrect records that have transcription errors. Calciumgood.dat \ncontains the corrected values. ", - "download": "http://jse.amstat.org/datasets/calcium.dat.txt", - "filename": "Calcium", - "name": " Calcium, inorganic phosphorus and alkaline phosphatase levels in elderly patients ", - "number_format": 31, - "remove_quotes": true, - "separator": "SPACE", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": false - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Percentage of body fat, age, weight, height, and ten body circumference\nmeasurements (e.g., abdomen) are recorded for 252 men. Body fat, a\nmeasure of health, is estimated through an underwater weighing\ntechnique. Fitting body fat to the other measurements using multiple\nregression provides a convenient way of estimating body fat for men\nusing only a scale and a measuring tape.", - "download": "http://jse.amstat.org/datasets/fat.dat.txt", - "filename": "fat", - "name": "Fitting Percentage of Body Fat to Simple Body Measurements", - "number_format": 31, - "remove_quotes": true, - "separator": "SPACE", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": false - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Sample of 654 youths, aged 3 to 19, in the area of East Boston\nduring middle to late 1970's. Interest concerns the relationship\nbetween smoking and FEV. Since the study is necessarily\nobservational, statistical adjustment via regression models\nclarifies the relationship.", - "download": "http://jse.amstat.org/datasets/fev.dat.txt", - "filename": "fev_", - "name": "Forced Expiratory Volume (FEV) Data", - "number_format": 31, - "remove_quotes": true, - "separator": "SPACE", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": false - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The tab-delimited data set gives characteristics of young female patients between\nthe ages of 11 to 26 who came to clinics of Johns Hopkins Medical Institutions between\n2006 and 2008 to begin the three-shot regimen of vaccinations with the anti-human\npapillomavirus (HPV) medication Gardasil. ", - "download": "http://jse.amstat.org/v19n1/gardasil.dat.txt", - "filename": "gardasil", - "name": "Retrospective Study (Potential Predictors for Completion or Non-Completion of ", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "From a very young age, shoes for boys tend to be wider than shoes for \ngirls. Is this because boys have wider feet, or because it is assumed that \ngirls, even in elementary school, are willing to sacrifice comfort for fashion? \nTo assess the former, a statistician measures kids' feet. Methods for analysis include \nt-tests, ANCOVA, and least-squares model building. This data set is useful for \ndiscussion of covariates, confounding, and conclusions in the context of the problem.", - "download": "http://jse.amstat.org/datasets/kidsfeet.dat.txt", - "filename": "kidsfeet", - "name": "Foot measurements for fourth grade children", - "number_format": 31, - "remove_quotes": true, - "separator": "SPACE", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": false - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "This article takes data from a paper in the _Journal of the American\nMedical Association_ that examined whether the true mean body\ntemperature is 98.6 degrees Fahrenheit. Because the dataset suggests\nthat the true mean is approximately 98.2, it helps students to grasp\nconcepts about true means, confidence intervals, and t-statistics.\nStudents can use a t-test to test for sex differences in body\ntemperature and regression to investigate the relationship between\ntemperature and heart rate.", - "download": "http://jse.amstat.org/datasets/normtemp.dat.txt", - "filename": "normtemp", - "name": "Normal Body Temperature, Gender, and Heart Rate ", - "number_format": 31, - "remove_quotes": true, - "separator": "SPACE", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": false - } - ], - "subcategory_name": "Common" - }, - { - "datasets": [ - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The effect of a single 600 mg dose of absorbic acid versus a sugar placebo on the muscular endurance (as measured by repetitive grip strength trials) of fifteen male volunteers (19-23 years old) was evaluated. The study was conducted in a double-blind manner with crossover. \nThree initial maximal contractions were performed for each subject, with the greatest value indicating maximal grip strength. Muscular endurance was measured by having the subjects squeeze the dynamometer, hold the contraction for three seconds, and repeat continuously until a value of 50% maximum grip strength was achieved for three consecutive contractions. Endurance time was defined as the number of repetitions required to go from maximum grip strength to the initial 50% value. Subjects were given frequent positive verbal encouragement in an effort to have them complete as many repetitions as possible. ", - "download": "http://www.statsci.org/data/general/vitaminc.txt", - "filename": "vitaminc", - "name": "Effect of Vitamin C on Muscular Endurance", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Proponents of Reiki, a type of touch therapy, hypothesize that Reiki re-establishes the energy balance in areas of the body experiencing disease and discomfort, thus promoting healing, reducing pain and increasing quality of life. The main feature that distinguishes Reiki from other couch therapies, such as therapeutic touch, is that Reiki therapists have physical contact with the body. Participants in Reiki are fully-clothed and may be covered with a blanket if they wish. The treatment, delivered to 18 specific areas of the body, begins with the participant lying on his or her back. The hands are placed on 10 distinct locations on the head and torso. The participant is then asked to lie on his or her stomach (or side, if this is more comfortable), where the hands are placed on 8 additional distinct locations covering the back, hip area and feet. The treatment takes approximately 1.25 hours to complete. \nTreatment of cancer pain usually focuses on opioids. Since high doses of opioids frequently aggravate other common symptoms of cancer patients, it is of interest to explore non-drug treatments that may allow control of cancer pain with lower doses of opioids. This project studied whether Reiki is beneficial in the management of pain for people from the community experiencing general chronic pain, as a prelimary step in deciding whether Reiki is worth trying for cancer patients. \nThe Sample \nThe eligibility criteria were that subjects must be at least 18 years old, not receiving chemotherapy or radiotherapy, be experiencing moderate pain (at least 3 on a VAS (0-10) or 2 on a Likert scale (0-5)), have normal cognitive function, be able to speak, read and write English, and be willing to complete the study rating scales. The sample size necessary was calculated using the binomial distribution with the assumption that 50% of the study participants might be expected to benefit from treatment. The probability of a decrease in pain following treatment in 14 or more cases out of 20 by chance alone is 0.058. [VAS means \"Visual Analogue Scale\". A Likert-type item consists of a single statement, followed by a usually five or six-point choice with each choice described in words.] \nNotices were placed in retail establishments and community centres. Potential participans identified themselves by telephoning the research assistant at a number provided on the recruitment posters. Individuals who met the eligibility criteria and who signed a consent form were scheduled to receive a treatment by a Reiki therapist. \nTwenty People were recruited (18 women and 2 men) who ranged in age from 23 to 62 years (mean 44 years). These participants were currently experiencing pain at 55 sites. Ten participants had pain in their upper body and 4 in their lower body. The remaining 6 participants had pain in both the upper and lower parts of their body. Eight participants attributed their pain to bone and muscle problems and 5 participants to chronic illness. Three of the participants included in the chronic illness group had cancer. Six participants had been experiencing pain for 1 year or less, and 7 had been experiencing pain for more than 1 year, up through 7 years. The remaining seven had been in pain for more than 7 years, one for 48 years. \nEighteen participants had asked their physician for help with their pain, and 19 were currently using at least 1 of the following strategies to manage it: analgesic preparations, anti-inflammatory medications, exercise, massage, acupuncture, therapeutic touch, chiropractic, homeopathy, meditation, vitamins, steam, muscle relaxation techniques and Tai Chi. \nTreatment and Data Collection \nParticipants were given 1 treatment by the Reiki therapist in her office. They lay on a massage table fully clothed and, if desired, were also covered with a sheet or blanket. The lights were dimmed, and a candle was lit; soft music played in the background. The environment was consistent through all 20 treatments. A pain VAS ranging from 0 to 10 and a Likert scale ranging from 0 to 5 were completed immediately before and after the Reiki treatment. ", - "download": "http://www.statsci.org/data/general/reiki.txt", - "filename": "reiki", - "name": "Using Reiki to Manage Pain", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The data consist of measurements (x1, x2, Age in months) on 23 babies, collected in the Faculty of Medicine at the University of Hong Kong. It would be of great medical interest to find a relationship between x1 and x2. However, any correlation between them is likely spurious because both x1 and x2 tend to increase with age. See Chris Lloyd's original mailing to the ANZStat mailing list discussion.", - "download": "http://www.statsci.org/data/general/babies.txt", - "filename": "babies", - "name": "Measurements on Babies", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "When anthropologists analyze human skeletal remains, an important piece of information is living stature. Since skeletons are commonly based on statistical methods that utilize measurements on small bones. The following data was presented in a paper in the American Journal of Physical Anthropology to validate one such method. \n\n\n\n\nVariable\n\nDescription\n\n\n\n\nMetaCarp\n\nMetacarpal bone I length in cm\n\nStature\n\nStature in cm\n\n\n\n", - "download": "http://www.statsci.org/data/general/stature.txt", - "filename": "stature", - "name": "Prediction of Height from Metacarpal Bone Length", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "CPK (creatine phosphokinase) is a enzyme contained within muscle cells which is necessary for the storage and release of energy. It can be released into the blood in response to vigorous exercise from damaged (leaky) muscle cells. This occurs often even in healthy athletes. \nThis study intestigated the metabolic effect of cross-country skiing. Subjects were participants in a 24 hour cross-country relay. Age, weight (kg) and blood CPK concentration 12 hours into the relay were recorded.", - "download": "http://www.statsci.org/data/general/bloodcpk.txt", - "filename": "bloodcpk", - "name": "Blood CPK in Cross-Country Skiers", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Studies conducted at the University of Melbourne indicate that there may be a difference between the pain thresholds of blonds and brunettes. Men and women of various ages were divided into four categories according to hair colour: light blond, dark blond, light brunette, and dark brunette. The purpose of the experiment was to determine whether hair colour is related to the amount of pain produced by common types of mishaps and assorted types of trauma. Each person in the experiment was given a pain threshold score based on his or her performance in a pain sensitivity test (the higher the score, the higher the person’s pain tolerance). \n\nVariable\n\nValues\n\nHairColour\n\nLightBlond, DarkBlond, LightBrunette or DarkBrunette \n\nPain\n\nPain theshold score \n", - "download": "http://www.statsci.org/data/oz/blonds.txt", - "filename": "blonds", - "name": "Pain Thresholds of Blonds and Brunettes", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "For his MS305 data project, Michael Larner measured the weight and various physical measurements for 22 male subjects aged 16 - 30. Subjects were randomly chosen volunteers, all in reasonable good health. Subjects were requested to slightly tense each muscle being measured to ensure measurement consistency. Apart from Mass, all measurements are in cm. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nMass\n\nWeight in kg\n\nFore\n\nMaximum circumference of forearm\n\nBicep\n\nMaximum circumference of bicep\n\nChest\n\nDistance around chest directly under the armpits\n\nNeck\n\nDistance around neck, approximately halfway up\n\nWaist\n\nDistance around waist, approximately trouser line\n\nThigh\n\nCircumference of thigh, measured halfway between the knee and the top of the leg\n\nCalf\n\nMaximum circumference of calf\n\nHeight\n\nHeight from top to toe\n\nShoulders\n\nDistance around shoulders, measured around the peak of the shoulder blades\n", - "download": "http://www.statsci.org/data/oz/physical.txt", - "filename": "physical", - "name": "Mass and Physical Measurements for Male Subjects", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Larsen and Marx (1986) write \nIn folklore, the full moon is often portrayed as something sinister, a kind of evil force possessing the power to control our behaviour. Over the centuries, many prominent writers and philosophers have shared this belief. Milton, in Paradise Lost, refers to \nDemoniac frenzy, moping melancholy\nAnd moon-struck madness. \nAnd Othello, after the murder of Desdemona, laments \nIt is the very error of the moon\nShe comes more near the earth than she was want\nAnd makes men mad. \nOn a more scholarly level, Sir William Blackstone, the renowned eighteenth centure English barrister, defined a \"lunatic\" as \none who hath ... lost the use of his reason and who hath lucid intervals, sometimes enjoying his senses and sometimes not, and that frequently depending upon changes of the moon. \nThe data give the admission rates to the emergency room of a Virginia mental health clinic before, during and after the 12 full moons from August 1971 to July 1972. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nMonth\n\nMonth of year: Aug, Sep, ... Jul\n\nMoon\n\nBefore, During or After the full moon\n\nAdmission\n\nAdmission rate (patients/day)\n\n\n\n", - "download": "http://www.statsci.org/data/general/fullmoon.txt", - "filename": "fullmoon", - "name": "Mental Hospital Admissions During Full Moons", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Five types of electrodes were applied to the arms of 16 subjects and the resistance measured. The experiment was designed to see whether all five electrode types performed similarly. \nAfter obtaining the results, the experimenters decided that the reason for the two large readings on subject 15 was the excessive amount of hair of those parts of the subject's arm. They concluded that this subject's data should be deleted. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nSubject\n\nSubject number\n\nE1\n\nResistance measured by electrode type 1\n\nE2\n\nResistance measured by electrode type 2\n\nE3\n\nResistance measured by electrode type 3\n\nE4\n\nResistance measured by electrode type 4\n\nE5\n\nResistance measured by electrode type 5\n", - "download": "http://www.statsci.org/data/general/resist.txt", - "filename": "resist", - "name": "Skin Resistance", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Osteoarthritis is a mechanical degeneration of joint surfaces causing pain, swelling and loss of joint function in one or more joints. Physiotherapists treat the affected joints to reduce pain (VAS = visual analogue scale) and to increase the range of movement (ROM). In this study there were 10 subjects, each of whom was treated with continuous TENS (electric nerve stimulation) and short wave diathermy. Measurements were taken also after no treatment. \n\n\n\n\nVariable\n\nDescription\n\n\n\n\nSubject\n\nSubject identifier\n\nNoROM\n\nROM after no treatment\n\nNoVAS\n\nVAS after no treatment\n\nTENSROM\n\nROM after continuous TENS\n\nTENSVAS\n\nVAS after continuous TENS\n\nSWDROM\n\nROM after short wave diathermy\n\nSWDVAS\n\nVAS after short wave diathermy\n", - "download": "http://www.statsci.org/data/oz/oa.txt", - "filename": "oa", - "name": "Treatment for Osteoarthritis", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The balance of subjects were observed for two different surfaces and for restricted and unrestricted vision. Balance was assessed qualitatively on an ordinal 4-point scale based on observation by the experimenter. Subjects were expected to be better balanced (show less sway) when standing on the normal surface than on foam, and when their eyes were open rather than closed or when their vision was restricted by a dome. \nEqual numbers of male and female subjects were chosen. For both males and females, ten older (more than 24 years old) and ten younger subjects were selected. \nThe data is available in two formats. The is in univariate or \"strung out form\" which is suitable for entry to Minitab or S-Plus and to most mixed model programs. The second is in repeated measures format which is suitable for SPSS and for most special purpose repeated measures programs. \nUnivariate format: \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nSubject\n\n1 to 40\n\nSex\n\nmale or female\n\nAge\n\nAge of subject in years\n\nHeight\n\nHeight in cm\n\nWeight\n\nWeight in kg\n\nSurface\n\nnormal or foam\n\nVision\n\neyes open, eyes closed, or closed dome\n\nCTSIB\n\nQualitive measure of balance, 1 (stable) - 4 (unstable) \n\n\n\n", - "download": "http://www.statsci.org/data/oz/ctsibuni.txt", - "filename": "ctsibuni", - "name": "Effect of Surface and Vision on Balance", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The data related to the transport of sulfite ions from blood cells suspended in a salt solution. The chloride concentration (%) was measured over a period of about 8 minutes as a continuous curve generated from electrical potentials. The data given here were digitized from the curve at 10 second intervals. \nThe theory of ion transport suggested that the concentration asymptote exponentially, i.e., \nChloride = q1{1 - q2exp(- q3Time)} \n\n\n\n\nVariable\n\nDescription\n\n\n\n\nTime\n\nElapsed time in minutes\n\nChloride\n\nChloride concentration (%)\n", - "download": "http://www.statsci.org/data/general/chloride.txt", - "filename": "chloride", - "name": "Transport of Sulfite Ions from Blood Cells", + "description": "The data related to the transport of sulfite ions from blood cells suspended in a salt solution. The chloride concentration (%) was measured over a period of about 8 minutes as a continuous curve generated from electrical potentials. The data given here were digitized from the curve at 10 second intervals. \nThe theory of ion transport suggested that the concentration asymptote exponentially, i.e., \nChloride = q1{1 - q2exp(- q3Time)} \n\n\n\n\nVariable\n\nDescription\n\n\n\n\nTime\n\nElapsed time in minutes\n\nChloride\n\nChloride concentration (%)\n", + "download": "http://www.statsci.org/data/general/chloride.txt", + "filename": "chloride", + "name": "Transport of Sulfite Ions from Blood Cells", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "The data are the times, in days, that heroin addicts spend in a clinic. There are two clinics and the covariates are believed to affect the times spent in the clinic by addicts. \n \nVariable\n \nDescription\n\nClinic\n\n1 or 2\n\nStatus\n\n0 = still in clinic at end of study (censored) or 1 = departed from clinic\n\nTime\n\ndays spent in clinic\n\nPrison\n\n1 = prison record or 0 = no record\n\nDose\n\nmethadone dosage (mg/day)\n", "download": "http://www.statsci.org/data/oz/heroin.txt", "filename": "heroin", "name": "Methadone Treatment of Heroin Addicts", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "A pharmaceutical company tested three formulations of a pain relief medicine for migraine headache sufferers. For the experiment, 27 volunteers were selected and 9 were randomly assigned to one of three drug formulations. The subjects were instructed to take the drug during their next migraine headache episode and to report their pain on a scale […] ", - "download": "https://dasl.datadescription.com/download/data/3053", - "filename": "Analgesics", - "name": "Analgesics", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "A study compared the effectiveness of several antidepressants by examining the experiments in which they had passed the FDA requirements. Each of those experiments compared the active drug with a placebo, an inert pill given to some of the subjects. In each experiment some patients treated with the placebo had improved, a phenomenon called the […] ", - "download": "https://dasl.datadescription.com/download/data/3054", - "filename": "Antidepressants", - "name": "Antidepressants", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "A student investigated just how effective washing with soap is in eliminating bacteria. To do this she tested four different methods—washing with water only, washing with regular soap, washing with antibacterial soap (ABS), and spraying hands with antibacterial spray (AS) (containing 65% ethanol as an active ingredient). Her experiment consisted of one experimental factor, the […] ", - "download": "https://dasl.datadescription.com/download/data/3561", - "filename": "Baterial-soap", - "name": "Baterial soap", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Measurements of 250 men of various ages. The percent of a man’s body that is fat is a matter of concern for health and fitness. But the %bodyfat is difficult and expensive to measure accurately. These data offer correct %bodyfat measurements along with a variety of easier to find measures. Can you build a model ", - "download": "https://dasl.datadescription.com/download/data/30790", - "filename": "Bodyfat", - "name": "Bodyfat", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Burger King publishes full nutrition information on its menu. These data are for the foods on the menu recently. (Visit the site listed as the reference for the most current list.) ", - "download": "https://dasl.datadescription.com/download/data/3089", - "filename": "Burger-King-items", - "name": "Burger King items", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Nutritionists are concerned that people have a good breakfast. But what does that mean? students collected nutrition information from the nutrition labels of cereals in one supermarket. ", - "download": "https://dasl.datadescription.com/download/data/3107", - "filename": "Cereals", - "name": "Cereals", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Researchers at the University of Denver Infant Study Center wondered whether temperature might influence the age at which babies learn to crawl. Perhaps the extra clothing that babies wear in cold weather would restrict movement and delay the age at which they started crawling. Data were collected on 208 boys and 206 girls. Parents reported the month of the baby’s birth and the age (in weeks) at which their child first crawled. The table gives the average Temperature (°F) when the babies were 6 months old and average Crawling Age (in weeks) for each month of the year.", - "download": "https://dasl.datadescription.com/download/data/3143", - "filename": "Crawling", - "name": "Crawling", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Life expectancy at birth, TV’s per capita, and doctor’s per capita for countries of the world. Doctors predict life expectancy, but is that causal? TVs also predict life expectancy. ", - "download": "https://dasl.datadescription.com/download/data/3169", - "filename": "life-expectancy", - "name": "Doctors and life expectancy", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Fertility (births/woman) and Female life expectancy for 219 countries of the world. (Data is available on both variables for only 200). How is life expectancy related to fertility? Are there any outliers and, if so, what do they indicate", - "download": "https://dasl.datadescription.com/download/data/3202", - "filename": "Fertility-and-life-expectancy-2014", - "name": "Fertility and life expectancy 2014", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Gossett says in his seminal 1908 paper: “Before I had succeeded in solving my problem analytically, I had endeavoured to do so empirically. The material used was a correlation table containing the height and left middle finger measurements of 3000 criminals, from a paper by W. R. MacDonell (Biometrika, Vol. I., p. 219).” His method was to write the 3000 finger length values on cards, shuffle them thoroughly, and the deal out 750 hands of 4 cards. For each hand he then calculated (with a mechanical calculator) the mean and standard deviation. (Note; He divided by n (= 4) and not by n-1 (= 3).) He then found values of ybar – the population mean (which he knew because he had the population; it is 11.5474) and divided each by the standard deviation. The resulting values formed the distribution which he then correctly described.\nThe finger measurements were originally given in mm and the heights in feet and inches. They have been converted to cm (at https://stat.ethz.ch/R-manual/R-devel/library/datasets/html/crimtab.html). The midpoint of intervals are used where MacDonnel gives a range of values.", - "download": "https://dasl.datadescription.com/download/data/3204", - "filename": "Fingers-and-Heights", - "name": "Fingers and Heights", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Is it true that students\ntend to gain weight during their first year in college? Cornell Professor of Nutrition David Levitsky recruited students from two large sections\nof an introductory health course. Although they were\nvolunteers, they appeared to match the rest of the freshman\nclass in terms of demographic variables such as sex\nand ethnicity. The students were weighed during the first\nweek of the semester, then again 12 weeks later. Based\non Professor Levitsky’s data, estimate the mean weight\ngain in first-semester freshmen and comment on the\n“freshman 15.” (Weights are in pounds.)", - "download": "https://dasl.datadescription.com/download/data/3218", - "filename": "Freshman-15", - "name": "Freshman 15", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "For humans, pregnancy lasts about 280 days. In other species of animals, the length of time from conception to birth varies. Is there any evidence that the gestation period is related to the animal’s life span? The data give Gestation Period (in days) and Life Expectancy (in years) for 18 species of mammals.", - "download": "https://dasl.datadescription.com/download/data/3241", - "filename": "Gestation_", - "name": "Gestation", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Physical therapists measure a patient’s manual dexterity with a simple task. The patient\npicks up small cylinders from a 4 * 4 frame with one hand, flips them over (still with one\nhand), and replaces them in the frame. The task is timed for all 16 cylinders. The tool was originally normed for adults. In a follow-up study, researchers\nused this tool to study how dexterity improves with age in children and establish norms against which to compare a patient’s dexterity.", - "download": "https://dasl.datadescription.com/download/data/3253", - "filename": "Hand-dexterity", - "name": "Hand dexterity", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Fitting someone for a hearing aid requires assessing the patient’s hearing ability. In one method of assessment, the patient listens to a tape of 50 English words. The tape is played at low volume, and the patient is asked to repeat the words. The patient’s hearing ability score is the number of words perceived correctly. Four tapes of equivalent difficulty are available so that each ear can be tested with more than one hearing aid. These lists were created to be equally difficult to perceive in silence, but hearing aids must work in the presence of background noise. Researchers had 24 subjects with normal hearing compare two of the tapes when a background noise was present, with the order of the tapes randomized. Is it reasonable to assume that the two lists are still equivalent for purposes of the hearing test when there is background noise? Base your decision on a confidence interval for the mean difference in the number of words people might misunderstand.", - "download": "https://dasl.datadescription.com/download/data/3261", - "filename": "Hearing", - "name": "Hearing", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Fitting someone for a hearing aid requires assessing the patient’s hearing ability. In one method of assessment, the patient listens to a tape of 50 English words. The tape is played at low volume, and the patient is asked to repeat the words. The patient’s hearing ability score is the number of words perceived correctly. Four tapes of equivalent difficulty are available so that each ear can be tested with more than one hearing aid. These lists were created to be equally difficult to perceive in silence, but hearing aids must work in the presence of background noise. Researchers had 24 subjects with normal hearing compare two of the tapes when a background noise was present, with the order of the tapes randomized. Is it reasonable to assume that the two lists are still equivalent for purposes of the hearing test when there is background noise? Base your decision on a confidence interval for the mean difference in the number of words people might misunderstand.", - "download": "https://dasl.datadescription.com/download/data/3262", - "filename": "Hearing-4-lists", - "name": "Hearing 4 lists", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The data hold measurements on people of various ages. The main variable of interest is the level of insulin-like growth factor (igƒ) (J. Clin. Endocrinol. Metab. 78(3): 744–752, March 1994). Each row in the data set corresponds to one individual. See also Igf13, which concentrates on children. ", - "download": "https://dasl.datadescription.com/download/data/3562", - "filename": "Igf", - "name": "Igf", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Measurements on children under 13 years of age. Most of the data was collected from physical examinations in schools. The main variable of interest is the level of insulin-like growth factor (igƒ) (J. Clin. Endocrinol. Metab. 78(3): 744–752, March 1994). Each row in the data set corresponds to one individual. See also the dataset Igf, which includes adults.", - "download": "https://dasl.datadescription.com/download/data/3563", - "filename": "Igf13", - "name": "Igf13", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Homer’s Iliad is an epic poem, compiled around 800 BCE, that describes several weeks of the last year of the 10-year siege of Troy (Ilion) by the Achaeans. The story centers on the rage of the great warrior Achilles. But it includes many details of injuries and outcomes, and is thus the oldest record of Greek medicine. The data report 146 recorded injuries for which both injury site and outcome are provided in the Illiad. Are some kinds of injuries more lethal than others?", - "download": "https://dasl.datadescription.com/download/data/3281", - "filename": "Illiad-Injuries", - "name": "Illiad Injuries", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "In 1974, the Bellevue-Stratford Hotel in Philadelphia was the scene of an outbreak of\nwhat later became known as legionnaires’ disease. The cause of the disease was finally discovered to be bacteria that thrived in the air-conditioning units of the hotel.\nOwners of the Rip Van Winkle Motel, hearing about the Bellevue-Stratford, replace their air-conditioning system. The data are the bacteria counts in the air of eight rooms, before and after a new air-conditioning system was installed (measured in colonies per cubic foot of air). Has the new system has succeeded in lowering the bacterial count?", - "download": "https://dasl.datadescription.com/download/data/3310", - "filename": "Legionnaires-disease", - "name": "Legionnaires disease", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "In 2015 the Council of Europe published a report entitled The European School Survey Project on Alcohol and Other Drugs (www.espad.org). Among other issues, the survey investigated the percent-ages of 16-year-olds who had used marijuana. The data are the results for 38 European countries. ", - "download": "https://dasl.datadescription.com/download/data/3326", - "filename": "Marijuana-2015", - "name": "Marijuana 2015", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Researchers in Food Science studied how big people’s mouths tend to be. They measured mouth volume by pouring water into the mouths of subjects who lay on their backs. Unless this is your idea of a good time, it would be helpful to have a model to estimate mouth volume more simply. Fortunately, mouth volume is related to height. (Mouth volume is measured in cubic centimeters and height in meters.)", - "download": "https://dasl.datadescription.com/download/data/3345", - "filename": "Mouth-volume", - "name": "Mouth volume", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "A hospital in Nashville is considering changes to the prenatal care they offer. They collected the gestation times of 70 pregnancies that ended in live births. The established human gestation time is 266 days. ", - "download": "https://dasl.datadescription.com/download/data/3359", - "filename": "Nashville", - "name": "Nashville", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Neck size", - "download": "https://dasl.datadescription.com/download/data/3360", - "filename": "Neck-size", - "name": "Neck size", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Paralyzed veterans", - "download": "https://dasl.datadescription.com/download/data/3388", - "filename": "Paralyzed-veterans", - "name": "Paralyzed veterans", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The Paralyzed Veterans of America (PVA) is a Congressionally chartered veterans’ service organization that represents the interests of paralyzed veterans. The agency provides a range of services to veterans who have spinal cord injury or dysfunction. It derives most of its funding from contributions. The data set PVA contains a sample of the data on donors who recently gave money to the organization.", - "download": "https://dasl.datadescription.com/download/data/3415", - "filename": "PVA", - "name": "PVA", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "People with spinal cord injuries may lose function in some, but not all, of their muscles. The ability to push oneself up is particularly important for shifting position when seated and for transferring into and out of wheelchairs. Surgeons compared two operations to restore the ability to push up in children. ", - "download": "https://dasl.datadescription.com/download/data/3479", - "filename": "Tendon-transf", - "name": "Tendon transfers", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Drug interaction study of a new and a standard oral contraceptive \ntherapy. See the \"STORY BEHIND THE DATA\" and \"PEDAGOGICAL NOTES\" \nsections below for details.", - "download": "http://jse.amstat.org/datasets/ocdrug.dat.txt", - "filename": "ocdrug", - "name": "Drug Interaction", - "number_format": 31, - "remove_quotes": true, - "separator": "SPACE", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": false - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Bacteria are cultured in medical laboratories to identify them so patients can be treated \ncorrectly. The tryptone dataset contains measurements of bacteria counts following the \nculturing of five strains of Staphylococcus aureus. There are many strains of \nStaphylococcus aureus; five were used by the experimenter. They are identified by numbers \nin the data because their names are too complicated to be useful as identifiers. The \ndataset also contains the time of incubation, temperature of incubation and concentration \nof tryptone, a nutrient. The protocols for culturing this bacteria, set the time at 24 \nhours, the temperature at 35 degrees and the tryptone concentration at 1.0%. The question \nis whether the conditions recommended in the protocols for the culturing of these strains \nare optimal. The task is to find the incubation time, temperature and tryptone concentration \nthat optimises the growth of this Bacterium.", - "download": "http://jse.amstat.org/datasets/Tryptone.dat.txt", - "filename": "Tryptone", - "name": "The Tryptone Task ", - "number_format": 31, - "remove_quotes": true, - "separator": "SPACE", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The file ch1b.dat is the waste site file, and contains the \nfollowing variables. There are NO missing values.\n\nx: Real, x-coordinate of location of an inactive hazardous waste\nsite containing trichloroethylene (TCE).\n\ny: Real, y-coordinate of location of an inactive hazardous waste\nsite containing trichloroethylene (TCE).\n\nsite: Integer, numerical label of waste site.\n Key: Site 1: Monarch Chemicals\n Site 2: IBM Endicott\n Site 3: Singer\n Site 4: Nesco\n Site 5: GE Auburn\n Site 6: Solvent Savers\n Site 7: Smith Corona\n Site 8: Victory Plaza\n Site 9: Hadco\n Site 10: Morse Chain\n Site 11: Groton", - "download": "http://lib.stat.cmu.edu/datasets/csb/ch1b.dat", - "filename": "Disease-Clusters", - "name": "Spatial Pattern Analysis to Detect Rare Disease Clusters", - "number_format": 31, - "remove_quotes": true, - "separator": "SPACE", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": false - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The file ch17.dat contains the following 15 variables:\n\nVariable Description\n\nOBS Observation number\nCN Center obtaining and reading the scan\nID Scan ID\nBA1 Bone area (sq cm) from centralized Reader 1\nBA2 Bone area (sq cm) from centralized Reader 2\nBA3 Bone area (sq cm) from centralized Reader 3\nBC1 Bone mineral content (gm) from centralized Reader 1\nBC2 Bone mineral content (gm) from centralized Reader 2\nBC3 Bone mineral content (gm) from centralized Reader 3\nBMD1 Bone mineral density (gm/sq cm) from centralized Reader 1\nBMD2 Bone mineral density (gm/sq cm) from centralized Reader 2\nBMD3 Bone mineral density (gm/sq cm) from centralized Reader 3\nBA Bone area (sq cm) from participating center\nBC Bone mineral content (gm) from participating center\nBMD Bone mineral density (gm/sq cm) from participating center\n", - "download": "http://lib.stat.cmu.edu/datasets/csb/ch17.dat", - "filename": "Bone-Mineral", - "name": "Quality Control for Bone Mineral Density Scans", - "number_format": 31, - "remove_quotes": true, - "separator": "SPACE", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": false - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The file ch21a.dat contains the spontaneous activity and rectal\ntemperature data (416 observations of 6 variables) There are no missing values.\n\nVariable List:\n\nOBS:\t\tObservation identification number.\n\nMORPHINE:\tDose of morphine sulfate (mg/kg) injected into study mice. The \n\t\trange is 0 to 8.0.\n\nDEL9_THC:\tDose of Delta9-THC (mg/kg) injected into study mice. The \n\t\trange is from 0 to 15.0.\n\nREP:\t\tIdentification of study replication. The entire 5x7 factorial \n\t\tdesign was replicated.\n\nSPON_ACT:\tSpontaneous Activity as defined by the number of interruptions \n\t\tof a photocell beam in a clear plastic cage over a 10 minute \n\t\tperiod of time.\n\nTEMP_B:\t\tRectal Temperature at baseline (just prior to treatment).\n\nTEMP_60:\tRectal Temperature at 60 minutes post treatment injection.\n\n\n", - "download": "http://lib.stat.cmu.edu/datasets/csb/ch21a.dat", - "filename": "Drug-Interactions", - "name": "Drug Interactions Between Morphine and Marijuana\n", - "number_format": 31, - "remove_quotes": true, - "separator": "SPACE", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": false - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The file ch21b.dat contains the tail-flick data (510 observations of \n6 variables) Missing data are encoded with a period. \n\nVariable List:\n\nOBS:\t\tObservation identification number.\n\nREP:\t\tIdentification of study. Two 5x7 factorial experiments and one \n\t\t5x5 factorial experiment are included.\n\nMORPHINE:\tDose of morphine sulfate (mg/kg) injected into study mice. The \n\t\trange is 0 to 8.0.\n\nDEL9_THC:\tDose of Delta9-THC (mg/kg) injected into study mice. The \n\t\trange is from 0 to 15.0.\n\nFLICK_C:\tControl Flick Time. The number of seconds required for the \n\t\tmouse to flick it tail from beneath a heat stimulus prior to \n\t\ttreatment.\n\nFLICK_T::\tTest Flick Time. The number of seconds required for the \n\t\tmouse to flick it tail from beneath a heat stimulus post \n\t\ttreatment. A 10 sec maximum latency was imposed.", - "download": "http://lib.stat.cmu.edu/datasets/csb/ch21b.dat", - "filename": "Drug-Interactions-2", - "name": "Drug Interactions Between Morphine and Marijuana\n", - "number_format": 31, - "remove_quotes": true, - "separator": "SPACE", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": false - } - ], - "subcategory_name": "Other" - }, - { - "datasets": [ - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Why do older people often seem not to remember things as well as younger people? Do they not pay attention? Do they just not process the material as thoroughly? One theory regarding memory is that verbal material is remembered as a function of the degree to which is was processed when it was initially presented. Eysenck (1974) randomly assigned 50 younger subjects and 50 older (between 55 and 65 years old) to one of five learning groups. The Counting group was asked to read through a list of words and count the number of letters in each word. This involved the lowest level of processing. The Rhyming group was asked to read each word and think of a word that rhymed with it. The Adjective group was asked to give an adjective that could reasonably be used to modify each word in the list. The Imagery group was instructed to form vivid images of each word, and this was assumed to require the deepest level of processing. None of these four groups was told they would later be asked to recall the items. Finally, the Intentional group was asked to memorize the words for later recall. After the subjects had gone through the list of 27 items three times they were asked to write down all the words they could remember. \n\n\n\n\nVariable\n\nDescription\n\n\n\n\nAge\n\nYounger or Older\n\nProcess\n\nThe level of processing: Counting, Rhyming, Adjective, Imagery or Intentional\n\nWords\n\nNumber of words recalled\n", - "download": "http://www.statsci.org/data/general/eysenck.txt", - "filename": "eysenck", - "name": "Age and Memory", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Nolen-Hoeksema and Morrow (1991) had the good fortune to have measured depression among college students 2 weeks before the Loma Prieta earthquake in California in 1989. Nolen-Hoeksema and Morrow collected repeat data to track the students’ adjustments to the earthquake. Measurements were taken every 3 weeks starting 2 weeks before the earthquake to 10 weeks after. The data were recreated by Howell (1999) based on the Nolen-Hoeksema and Morrow findings. Each row gives the depression scores for one student.\n\n\nVariable\n\nDescription\n\n\n\n\n\nWeek0\n\nDepression scores 2 weeks before the earthquake\n\nWeek3\n\nDepression scores one week the quake\n\nWeek6\n\nDepression scores 4 weeks after the quake\n\nWeek9\n\nDepression scores 7 weeks after the quake\n\nWeek12\n\nDepression scores 10 weeks after the quake\n", - "download": "http://www.statsci.org/data/general/lomaprie.txt", - "filename": "lomaprie", - "name": "Depression Before and After an Earthquake", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": " In a random sample of U.S. adults surveyed in December 2011, Pew Research asked how important it is “to you personally” to be successful in a high-paying career or profession. Responses are recorded by sex and age. ", - "download": "https://dasl.datadescription.com/download/data/3071", - "filename": "Being-successful", - "name": "Being successful", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "A researcher at Cornell University wanted to know how friendship might affect simple sales such as this. She randomly divided subjects into two groups and gave each group descriptions of items they might want to buy. One group was told to imagine buying from a friend whom they expected to see again. The other group […] ", - "download": "https://dasl.datadescription.com/download/data/3090", - "filename": "Buy-from-a-friend", - "name": "Buy from a friend", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The September 1998 issue of the American T\nPsychologist published an article by Kraut et al. that\nreported on an experiment examining “the social and\npsychological impact of the Internet on 169 people in\n73 households during their first 1 to 2 years online.” In the\nexperiment, 73 households were offered free Internet access\nfor 1 or 2 years in return for allowing their time and activity\nonline to be tracked. The members of the households who\nparticipated in the study were also given a battery of tests\nat the beginning and again at the end of the study. The\nconclusion of the study made news headlines: Those who\nspent more time online tended to be more depressed at the\nend of the experiment.\nThe news reports about this study clearly concluded that\nusing the Internet causes depression. Is such a conclusion warranted?", - "download": "https://dasl.datadescription.com/download/data/3158", - "filename": "Depression-and-the-internet", - "name": "Depression and the internet", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "A Harvard psychologist recruited 75 female hotel maids to participate in a study. She randomly selected 41 of them, whom she informed (truthfully) that the work they do satisfies the Surgeon General’s recommendations for an active lifestyle, providing examples to show that their work is good exercise. The other 34 were told nothing. Various characteristics, such as weight, body fat, body mass index and blood pressure were recorded at the start of the study and again after four weeks. The researcher was interested in whether the information she provided would result in measurable physical changes. If there is a difference, it might challenge our understanding of the placebo effect because being informed could make a difference.", - "download": "https://dasl.datadescription.com/download/data/3273", - "filename": "Hotel-maids", - "name": "Hotel maids", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "In an experiment to test ginkgo bloba, subjects were assigned randomly to take ginkgo biloba supplements or a placebo. Their memory was tested to see whether it improved. ", - "download": "https://dasl.datadescription.com/download/data/3335", - "filename": "Memory", - "name": "Memory", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The New York Times combined survey data (economix.blogs.nytimes.com/2013/\n07/10/working-parents-wanting-fewer-hours/) with data from\nthe U.S. Bureau of Labor Statistics (BLS) (www.bls.gov/news\n.release/archives/famee_04262013.htm) comparing how mothers\nand fathers would like to allocate their time compared with\nwhat they actually do. They asked a sample of parents with\nchildren 18 or under:\n“If money were no object, and you were free to do whatever\nyou wanted, would you stay at home, would you work full\ntime, or would you work part time?”\nPercent of respondents to this question choosing each\nalternative are reported in the “Desire” columns of the table.\nData in the “Actual” column are from the BLS. (Note:\n“Unemployed” = unemployed and actively seeking work.)\nThe table reports column percents (which may not add to\n100% due to rounding)", - "download": "https://dasl.datadescription.com/download/data/3344", - "filename": "Mothers-fathers-aspirations", - "name": "Mothers and fathers aspirations", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "In a study published in the journal Psychological Science, Rauscher, Shaw, and Ky reported that when students were given a spatial reasoning section of a standard IQ test, those who listened to Mozart for 10 minutes improved their scores more than those who simply sat quietly. ", - "download": "https://dasl.datadescription.com/download/data/3350", - "filename": "Mozart", - "name": "Mozart", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Researchers interviewed participants to find some who reliably fell asleep and awoke on one side and who could remember their dreams. They found 63 participants, of whom 41 were right-side sleepers and 22 slept on their left side. Then they interviewed them about their dreams. Of the 41 right-side sleepers, only 6 reported often having nightmares. But of the 22 left-side sleepers 9 reported nightmares. Is the difference significant?", - "download": "https://dasl.datadescription.com/download/data/3366", - "filename": "Nightmares", - "name": "Nightmares", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Stereograms appear to be composed entirely of\nrandom dots. However, they contain separate images that a\nviewer can “fuse” into a three-dimensional (3D) image by staring\nat the dots while defocusing the eyes. An experiment was\nperformed to determine whether knowledge of the embedded\nimage affected the time required for subjects to fuse the images.\nOne group of subjects (group NV) received no information or\njust verbal information about the shape of the embedded object.\nA second group (group VV) received both verbal information\nand visual information (specifically, a drawing of the object).\nThe experimenters measured how many seconds it took for the\nsubject to report that he or she saw the 3D image.", - "download": "https://dasl.datadescription.com/download/data/3459", - "filename": "Stereograms", - "name": "Stereograms", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The file ch20.dat contains the following variables:\n\nid subject identifier\nclinical indicator for selection into clinical sample:\n 1=in clinical sample; 0=not in clinical sample\nstratum stratum membership:\n 1=high screen; 2=low screen blacks;\n 3=low screen whites\nrace subject's self-reported race:\n 1=white; 2=black\ngender subject's gender:\n 1=male; 2=female\nrparents subject's guardian status:\n 1=does not live with both natural parents;\n 0=lives with both natural parents\ncesdtot subject's total center for epidemiologic studies depression\n scale score (range 0-60)\ncohtot subject's total cohesion score, based on faces-ii\n (range 16-80)\nmdd clinical diagnosis of major depression:\n 1=positive diagnosis; 0=negative diagnosis\n 9=missing for subjects not in clinical sample\nweight sampling weights used in logistic regression; defined as\n number of subjects in screening sample in each stratum", - "download": "http://lib.stat.cmu.edu/datasets/csb/ch20.dat", - "filename": "Adolescent-Depression", - "name": "Two-Stage Sampling Designs for Adolescent Depression Studies", - "number_format": 31, - "remove_quotes": true, - "separator": "SPACE", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": false - } - ], - "subcategory_name": "Psychology" - }, - { - "datasets": [ - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "A study examined the health risks of smoking measured the cholesterol levels of people who had smoked for at least 25 years and people of similar ages who had smoked for no more than 5 years and then stopped", - "download": "https://dasl.datadescription.com/download/data/3111", - "filename": "Cholesterol-and-smoking", - "name": "Cholesterol and smoking", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Data on 816 brands of cigarettes. What relationships are there among the nicotine content, tars, and CO? Are any brands unusually high or low in nicotine? Can you account for that? ", - "download": "https://dasl.datadescription.com/download/data/3113", - "filename": "Cigarettes", - "name": "Cigarettes", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Researchers measured the concentration (nanograms per milliliter) of cotinine in the blood\nof three groups of people: nonsmokers who have not been exposed to smoke, nonsmokers\nwho have been Exposed To Smoke (ETS), and smokers. Cotinine is left in the blood when\nthe body metabolizes nicotine, so its value is a direct measurement of the effect of passive smoke exposure.", - "download": "https://dasl.datadescription.com/download/data/3389", - "filename": "Passive-smoke", - "name": "Passive smoke", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The Centers for Disease Control and Prevention\ntrack cigarette smoking in the United States. How has the percentage of people who smoke changed since the danger became clear during the last half of the 20th\ncentury? The data give percentages of smokers among\nmen 18–24 years of age, as estimated by surveys, from 1965\nthrough 2014.", - "download": "https://dasl.datadescription.com/download/data/3455", - "filename": "Smoking-2014", - "name": "Smoking 2014", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "There has been a steady decline the the percentage of pregnant mothers who smoke. These data document the trend. The run only until 2011, which appears to be the latest date for which the CDC has data. ", - "download": "https://dasl.datadescription.com/download/data/3456", - "filename": "Smoking-and-Pregnancy-2011", - "name": "Smoking and Pregnancy 2011", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Measurements of weight and tar, nicotine, and carbon monoxide content\nare given for 25 brands of domestic cigarettes.", - "download": "http://jse.amstat.org/datasets/cigarettes.dat.txt", - "filename": "Cigarette_", - "name": "Cigarette data for an introduction to multiple regression", - "number_format": 31, - "remove_quotes": true, - "separator": "SPACE", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": false - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The ten variables listed in each file are: \n\nAge: Age on January 1, 1982. \nGender: 0=Male, 1=Female \nEducation: 0=no college, 1=some college \nSmoker: 1=never, 2=former, 3=current \nCigarettes/day: values rounded UP to the nearest 5.\nYears smoked: Number of years smoked as of January 1, 1982. \nYears quit: Number of years since smoking cessation, as of \n January 1, 1982 (zero indicates less than one year)\nFollowup Time: Years from January 1, 1982 until death or last\n interview.\nDeath codes: 0=alive, 1=death from other causes, 2=lung cancer death.\nFreq: the frequency at which each combination of variables occured.", - "download": "http://lib.stat.cmu.edu/datasets/csb/ch19a.dat", - "filename": "never-smokers", - "name": "never-smokers", - "number_format": 31, - "remove_quotes": true, - "separator": "SPACE", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": false - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The ten variables listed in each file are: \n\nAge: Age on January 1, 1982. \nGender: 0=Male, 1=Female \nEducation: 0=no college, 1=some college \nSmoker: 1=never, 2=former, 3=current \nCigarettes/day: values rounded UP to the nearest 5.\nYears smoked: Number of years smoked as of January 1, 1982. \nYears quit: Number of years since smoking cessation, as of \n January 1, 1982 (zero indicates less than one year)\nFollowup Time: Years from January 1, 1982 until death or last\n interview.\nDeath codes: 0=alive, 1=death from other causes, 2=lung cancer death.\nFreq: the frequency at which each combination of variables occured.", - "download": "http://lib.stat.cmu.edu/datasets/csb/ch19b.dat", - "filename": "current-smokers-m", - "name": "current smokers: male", - "number_format": 31, - "remove_quotes": true, - "separator": "SPACE", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": false - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The ten variables listed in each file are: \n\nAge: Age on January 1, 1982. \nGender: 0=Male, 1=Female \nEducation: 0=no college, 1=some college \nSmoker: 1=never, 2=former, 3=current \nCigarettes/day: values rounded UP to the nearest 5.\nYears smoked: Number of years smoked as of January 1, 1982. \nYears quit: Number of years since smoking cessation, as of \n January 1, 1982 (zero indicates less than one year)\nFollowup Time: Years from January 1, 1982 until death or last\n interview.\nDeath codes: 0=alive, 1=death from other causes, 2=lung cancer death.\nFreq: the frequency at which each combination of variables occured.", - "download": "http://lib.stat.cmu.edu/datasets/csb/ch19c.dat", - "filename": "current-smokers-f", - "name": "current smokers: female", - "number_format": 31, - "remove_quotes": true, - "separator": "SPACE", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": false - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The ten variables listed in each file are: \n\nAge: Age on January 1, 1982. \nGender: 0=Male, 1=Female \nEducation: 0=no college, 1=some college \nSmoker: 1=never, 2=former, 3=current \nCigarettes/day: values rounded UP to the nearest 5.\nYears smoked: Number of years smoked as of January 1, 1982. \nYears quit: Number of years since smoking cessation, as of \n January 1, 1982 (zero indicates less than one year)\nFollowup Time: Years from January 1, 1982 until death or last\n interview.\nDeath codes: 0=alive, 1=death from other causes, 2=lung cancer death.\nFreq: the frequency at which each combination of variables occured.", - "download": "http://lib.stat.cmu.edu/datasets/csb/ch19d.dat", - "filename": "former-smokers-mnc", - "name": "former smokers: male, no college", - "number_format": 31, - "remove_quotes": true, - "separator": "SPACE", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": false - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The ten variables listed in each file are: \n\nAge: Age on January 1, 1982. \nGender: 0=Male, 1=Female \nEducation: 0=no college, 1=some college \nSmoker: 1=never, 2=former, 3=current \nCigarettes/day: values rounded UP to the nearest 5.\nYears smoked: Number of years smoked as of January 1, 1982. \nYears quit: Number of years since smoking cessation, as of \n January 1, 1982 (zero indicates less than one year)\nFollowup Time: Years from January 1, 1982 until death or last\n interview.\nDeath codes: 0=alive, 1=death from other causes, 2=lung cancer death.\nFreq: the frequency at which each combination of variables occured.", - "download": "http://lib.stat.cmu.edu/datasets/csb/ch19e.dat", - "filename": "former-smokers-msc", - "name": "former smokers: male, some college ", - "number_format": 31, - "remove_quotes": true, - "separator": "SPACE", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": false - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The ten variables listed in each file are: \n\nAge: Age on January 1, 1982. \nGender: 0=Male, 1=Female \nEducation: 0=no college, 1=some college \nSmoker: 1=never, 2=former, 3=current \nCigarettes/day: values rounded UP to the nearest 5.\nYears smoked: Number of years smoked as of January 1, 1982. \nYears quit: Number of years since smoking cessation, as of \n January 1, 1982 (zero indicates less than one year)\nFollowup Time: Years from January 1, 1982 until death or last\n interview.\nDeath codes: 0=alive, 1=death from other causes, 2=lung cancer death.\nFreq: the frequency at which each combination of variables occured.", - "download": "http://lib.stat.cmu.edu/datasets/csb/ch19f.dat", - "filename": "former-smokers-fnc", - "name": "former smokers: female, no college", - "number_format": 31, - "remove_quotes": true, - "separator": "SPACE", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": false - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The ten variables listed in each file are: \n\nAge: Age on January 1, 1982. \nGender: 0=Male, 1=Female \nEducation: 0=no college, 1=some college \nSmoker: 1=never, 2=former, 3=current \nCigarettes/day: values rounded UP to the nearest 5.\nYears smoked: Number of years smoked as of January 1, 1982. \nYears quit: Number of years since smoking cessation, as of \n January 1, 1982 (zero indicates less than one year)\nFollowup Time: Years from January 1, 1982 until death or last\n interview.\nDeath codes: 0=alive, 1=death from other causes, 2=lung cancer death.\nFreq: the frequency at which each combination of variables occured.", - "download": "http://lib.stat.cmu.edu/datasets/csb/ch19g.dat", - "filename": "former-smokers-fsc", - "name": "former smokers: female, some college", - "number_format": 31, - "remove_quotes": true, - "separator": "SPACE", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": false - } - ], - "subcategory_name": "Smoking" - } - ] - }, - { - "category_name": "Nature", - "subcategories": [ - { - "datasets": [ - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Daily rainfall (in millimetres) was recorded over a 47-year period in Turramurra, Sydney, Australia. For each year, the wettest day was identified (that having the greatest rainfall). The data show the rainfall recorded for the 47 annual maxima.", - "download": "http://www.statsci.org/data/oz/sydrain.txt", - "filename": "sydrain", - "name": "Annual Maximums of Daily Rainfall in Sydney", - "number_format": 31, - "remove_quotes": true, - "separator": "auto", - "simplify_whitespaces": true, - "skip_empty_parts": false, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "These data were collected in a cloud-seeding experiment in Tasmania between mid-1964 and January 1971. The rainfalls are period rainfalls in inches. \nSeeded\n - \nS = seeded, U = unseeded\nSeason\n - \nAutumn, Winter, Spring Summer\nTE\n - \nrainfall in east target area\nTW\n - \nrainfall in west target area\nNC\n - \nrainfall in north control area\nSC\n - \nrainfall in south control area\nNWC\n - \nrainfall in north-west conrol area\n\n", - "download": "http://www.statsci.org/data/oz/cloudtas.txt", - "filename": "cloudtas", - "name": "Cloud Seeding in Tasmania", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The data are monthly averaged atmospheric pressure differences between Easter Island and Darwin, Australia. This difference drives the trade winds in the southern hemisphere. An annual cycle may be expected, and also longer cycles corresponding to the El Nino and to the Southern Oscillations. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nPressure\n\nMonthly average atmospheric pressure differences\n", - "download": "http://www.statsci.org/data/oz/enso.txt", - "filename": "enso", - "name": "Pressure Difference between Easter Island and Darwin", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Daily rainfall for Melbourne, from 1 January 1981 to 31 December 1990. Note that this series is 3 observations longer than the temperature series.", - "download": "http://www.statsci.org/data/oz/melbrain.txt", - "filename": "melbrain", - "name": "Melbourne Daily Rainfall", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Daily minimum and maximum temperatures for Melbourne, from 1 January 1981 to 31 December 1990. The two February 29 leap days are excluded, so there are 10 x 365 = 3650 observations.", - "download": "http://www.statsci.org/data/oz/melbtemp.txt", - "filename": "melbtemp", - "name": "Melbourne Temperatures", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Rainfall for each 6-day period for Adelaide from 1839 to 1977 inclusive. December 31 of the previous year is included in the non-leap years to make 15 6-day periods for each year. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nYear\n\n1839 - 1977\n\nPeriod\n\n1 - 61 for each year\n\nRainfall\n\nRainfall in \n", - "download": "http://www.statsci.org/data/oz/adelrain.txt", - "filename": "adelrain", - "name": "Adelaide Rainfall", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Daily 6am and 3pm temperatures for Brisbane for the decade 1977 - 1986. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nDay\n\nDay as YearMonthDay\n\nTemp06\n\n6am Temperature in degrees Celsius x 10\n\nTemp15\n\n3pm Temperature in degrees Celsius x 10\n", - "download": "http://www.statsci.org/data/oz/bristemp.txt", - "filename": "bristemp", - "name": "Brisbane Temperatures", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The columns in the data set represent the precipitation weighted mean concentrations of ions for the year 1986, for 47 sites in the United Kingdom. \n \nVariable\n \nDescription\n\nSite\n\nSite number \n\nRain\n\nRain (measured in mm) \n\nH\n\nH+ \n\nSO4\n\nSO4-2 \n\nNO3\n\nNO3- \n\nNH4\n\nNH4+ \n\nx\n\nx-coordinate (measured in cm) \n\ny\n\ny-coordinate (cm) \n\nThe measurement of NH4+ for site number 35 was not available and is represented by NA in the data set. The x- and y-cordinates were measured in cm from a map of the UK. ", - "download": "http://www.statsci.org/data/general/rainuk.txt", - "filename": "rainuk", - "name": "Acid Rain in the UK", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Froliger and Kane measured the pH (a scale on which a value of 7 is neutral and values below 7 are acidic) of water collected from precipitation events in Allegheny County, Pennsylvania between December 20, 1973 and May 23, 1974. Display the distribution of these values and describe with words and numbers what you see. ", - "download": "https://dasl.datadescription.com/download/data/3041", - "filename": "acid-rain", - "name": "Acid rain", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The data give the average January Temperature (in degrees Fahrenheit) and Latitude (in degrees north of the equator) for 59 U.S. cities. How are they related? ", - "download": "https://dasl.datadescription.com/download/data/3114", - "filename": "City-climate", - "name": "City climate", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "-", - "download": "https://dasl.datadescription.com/download/data/3115", - "filename": "City-temperatures", - "name": "City temperatures", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Global temperature from https://www.ncdc.noaa.gov/cag/data-info/global Global temperature anomaly data come from the Global Historical Climatology Network-Monthly (GHCN-M) data set and International Comprehensive Ocean-Atmosphere Data Set (ICOADS), which have data from 1880 to the present. These two datasets are blended into a single product to produce the combined global land and ocean temperature anomalies. The available timeseries of global-scale temperature anomalies are calculated with respect to the 20th century average, while the mapping tool displays global-scale temperature anomalies with respect to the 1981-2010 base period. For more information on these anomalies, please visit Global Surface Temperature Anomalies. CO2 from ftp://aftp.cmdl.noaa.gov/products/trends/co2/co2_annmean_mlo.txt DJIA from https://www.measuringworth.com\n\nScientists claim that changes in the mean global temperature are primarily due to changes in CO2 levels. Both trends are here from 1959 to 2016. For an alternative, the data includes the annual closing price of the Dow Jones Industrial Average. Can it predict global temperature?", - "download": "https://dasl.datadescription.com/download/data/3116", - "filename": "Climate-change-2016", - "name": "Climate change 2016", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Hurricane frequencies", - "download": "https://dasl.datadescription.com/download/data/3279", - "filename": "Hurricane-frequencies", - "name": "Hurricane frequencies", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Hurricane history", - "download": "https://dasl.datadescription.com/download/data/3280", - "filename": "Hurricane-history", - "name": "Hurricane history", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The barometric pressure at the center of a hurricane is often used to measure the strength of the hurricane because it can predict the maximum wind speed of the storm. How well is the wind speed predicted by the barometric pressure? ", - "download": "https://dasl.datadescription.com/download/data/3278", - "filename": "Hurricanes-2015", - "name": "Hurricanes 2015", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The Los Angeles Almanac reports a number of variables about the weather in LA. Among them is the annual rainfall, reported here for 1991-2018. It is worthwhile to look up any outliers. ", - "download": "https://dasl.datadescription.com/download/data/3555", - "filename": "LA-rainfall", - "name": "LA rainfall", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Is global climate change leading to an increase in the number of major hurricanes? The data gives the number of hurricanes classified as major hurricanes in the Atlantic Ocean each year from 1944 through 2013, as reported by NOAA: ", - "download": "https://dasl.datadescription.com/download/data/3323", - "filename": "Major-hurricane-2013", - "name": "Major hurricanes 2013", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Tornadoes 2015\nSource: www.nws.noaa.gov/om/hazstats/resources/weather_fatalities.pdf", - "download": "https://dasl.datadescription.com/download/data/3488", - "filename": "Tornadoes", - "name": "Tornadoes 2015", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Tracking hurricanes 2015", - "download": "https://dasl.datadescription.com/download/data/3493", - "filename": "Tracking-hurricanes-2015", - "name": "Tracking hurricanes 2015", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The National Hurricane Center (NHC) of the National Oceanic and Atmospheric\nAdministration (NOAA) tries to predict the path each hurricane will take. But hurricanes\ntend to wander around aimlessly and are pushed by fronts and other weather\nphenomena in their area, so they are notoriously difficult to predict. Even relatively small changes in a hurricane’s track can make big differences in the damage it causes. The data give the mean error in nautical miles of the NHC’s 72-hour predictions of Atlantic hurricanes for 1970-2017. NOAA refers to these errors as the Forecast\nerror or the Prediction error and reports annual results.", - "download": "https://dasl.datadescription.com/download/data/3494", - "filename": "Tracking-hurricanes-2016", - "name": "Tracking hurricanes 2016", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Tsunamis 2016", - "download": "https://dasl.datadescription.com/download/data/3500", - "filename": "Tsunamis-2016", - "name": "Tsunamis 2016", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "http://www.ngdc.noaa.gov/hazard/tsu_db.shtml Extracted Event Validity 3 and 4 Cause Codes 1-5 Event Validity: 4 = definite tsunami 3 = probable tsunami 2 = questionable tsunami 1 = very doubtful tsunami 0 = event that only caused a seiche or disturbance in an inland river -1 = erroneous entry Cause Code: Valid values: 0 to 11 The source of the tsunami: 0 = Unknown 1 = Earthquake 2 = Questionable Earthquake 3 = Earthquake and Landslide 4 = Volcano and Earthquake 5 = Volcano, Earthquake, and Landslide 6 = Volcano 7 = Volcano and Landslide 8 = Landslide 9 = Meteorological 10 = Explosion 11 = Astronomical Tide", - "download": "https://dasl.datadescription.com/download/data/3501", - "filename": "Tsunamis-2018", - "name": "Tsunamis 2018", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Weather forecasts", - "download": "https://dasl.datadescription.com/download/data/3519", - "filename": "Weather-forecasts", - "name": "Weather forecasts", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Wind speed", - "download": "https://dasl.datadescription.com/download/data/3528", - "filename": "Wind-speed", - "name": "Wind speed", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The file ch3a.dat includes the validation data collected at the stationary\nambient monitoring site. The variables are:\n\n 1. Date, in MM/DD/YY format,\n DC\n 2. 12-hour average daytime continuous ozone concentration, X ,\n 1\n DP\n 3. 12-hour average daytime passive ozone concentration, X ,\n 1\n NC\n 4. 12-hour average nighttime continuous ozone concentration, X , and\n 1\n NP\n 5. 12-hour average nighttime passive ozone concentration, X .\n 1", - "download": "http://lib.stat.cmu.edu/datasets/csb/ch3a.dat", - "filename": "Ozone_", - "name": "Prediction Models for Personal Ozone Exposure Assessment", - "number_format": 31, - "remove_quotes": true, - "separator": "SPACE", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": false - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The file ch3b.dat includes the personal ozone exposure data. The \nvariables are:\n\n 1. Subject identification number, ranging from 1 to 23,\n\n 2. Date, in MM/DD/YY format,\n\n 3. Home region, ranging from 1 to 6,\n\n 4. 12-hour average daytime personal ozone concentration, Y,\n\n 5. 12-hour average daytime continuous ozone concentration at the\n DC\n stationary site, X ,\n 1\n\n 6. 12-hour average nighttime continuous ozone concentration at the\n NC\n stationary site, X ,\n 1\n O\n 7. 24-hour average home outdoor passive ozone concentration, X ,\n 1\n DI\n 8. 12-hour average home indoor daytime passive ozone concentration, X ,\n 1\n NI\n 9. 12-hour average home indoor nighttime passive ozone concentration, X ,\n 1\n\n 10. Prediction values for a 12-hour microenvironmental model based\n H\n on hourly ozone concentrations, X ,\n 2\n O\n 11. Fraction of time spent anywhere outdoors, X ,\n 3\n I\n 12. Fraction of time spent at home indoors, X , and\n 3\n\n 13. Indicator variable for whether the child stayed near the\n S\n home for the whole day, X , where 1 = yes, 0 = no.\n 3", - "download": "http://lib.stat.cmu.edu/datasets/csb/ch3b.dat", - "filename": "Ozone2", - "name": "Prediction Models for Personal Ozone Exposure Assessment", - "number_format": 31, - "remove_quotes": true, - "separator": "SPACE", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": false - } - ], - "subcategory_name": "Weather" - }, - { - "datasets": [ - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The ocean swell produces spectacular eruptions of water through a hole in the cliff at Kiama, about 120km south of Sydney, known as the Blowhole. The times at which 65 successive eruptions occurred from 1340 hours on 12 July 1998 were observed using a digital watch. \nJim Irish writes \nAnyone who has visited the Blowhole more than once knows that the rate and volume of eruptions varies. This variation occurs at several timescales. We might expect that part is explained by the tides, so that eruptions are more frequent and spectacular when the tide is very high, and eruptions obviously depend on the presence of a large ocean swell generated by prolonged strong winds over the ocean well offshore from Kiama. Hence, any stochastic model fitted to data observed over a short period of time is only applicable to that period, and perhaps a few hours either side of the observations. But we might infer from the model fitted to those data that a similar model applies more generally. ", - "download": "http://www.statsci.org/data/oz/kiama.txt", - "filename": "kiama", - "name": "Kiama Blowhole Eruptions", - "number_format": 31, - "remove_quotes": true, - "separator": "auto", - "simplify_whitespaces": true, - "skip_empty_parts": false, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The data records the length of rivers in the South Island of New Zealand. The lengths are given in kilometres. The second variable, FlowsInto, indicates whether the river flows into the Pacific Ocean (0) or the Tasman Sea (1). A map of the island's rivers is included here.", - "download": "http://www.statsci.org/data/oz/nzrivers.txt", - "filename": "nzrivers", - "name": "Length of New Zealand Rivers", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Date on the concentration of polychlorinated biphenyl (PCB) residues in a series of lake trout from Cayuga Lake, NY, were reported in Bache et al (1972). The ages of the fish were accurately known, because the fish were annually stocked as yearlings and distinctly marked as to year class. Each whole fish was mechanically chopped, ground, and thoroughly mixed, and 5-gram samples taken. The samples were treated and PCB residues in parts per million (ppm) were estimated using column chromatography. \nBates and Watts (1988) use a linear model \nlog(PCB) = b1 + b2 Age1/3 \nbut they remark that the nonlinear model \nlog(PCB) = b1 + b2 Ageq \nis slightly better. \n\n\n\n\nVariable\n\nDescription\n\n\n\n\nAge\n\nAge of trout (years)\n\nPCB\n\nPCB concentration (ppm)\n", - "download": "http://www.statsci.org/data/general/troutpcb.txt", - "filename": "troutpcb", - "name": "PCB Concentrations in Lake Trout", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Jaffe, Parker and Wilson have investigated the concentration of several hydrophobic organic substances (such as hexachlorobenzene, chlordane, heptachlor, aldrin, dieldrin, endrin) in the Wolf River in Tennessee. Measurements were taken downstream of an abandoned dump site that had previously been used by the pesticide industry to dispose of its waste products. \nIt was expected that these hydrophic substances might have a nonhomogeneous vertical distribution in the river because of differences in density between these compounds and water and because of the adsorption of these compounds on sediments, which could lead to higher concentrations on the bottom. It is important to check this hypothesis because the standard procedure of sampling at six-tenths of the depth could miss the bulk of these pollutants if the distribution were not uniform. \nGrab samples were taken with a La Motte-Vandorn water sampler of 1 litre capacity at various depths of the river. This sampler consists of a horizontal plexiglas tube of 7 centimetres diameter and a plunger of each side which shuts the sampler when the sampler is at the desired depth. Ten surface, 10 mid-depth and 10 bottom samples were collected, all within a relatively short period. Until they were analysed the samples were stored in 1-quart mason jars at low temperature. \nIn the analysis of the samples, a 250-millilitre water sample was taken from each mason jar and was extracted with 1 millilitre of either hexanes or petroleum ether. A sample of the extract was then injected into a gas chromatograph and the output was compared against standards of known concentrations. The test procedure was repeated two more times, injecting different samples of the extract in the gas chromatograph. The average aldrin and hexachlorobenzene (HCB) concentrations (in nanograms per liter) in these 30 samples are given in the data.", - "download": "http://www.statsci.org/data/general/wolfrive.txt", - "filename": "wolfrive", - "name": "Wolf River Pollution", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The following data from the Statistical Abstract of the United States give the number of accidental oil spills at sea and the amount of oil lost in these spills for the years 1973 - 1985. \n\n\n\n\nVariable\n\nDescription\n\n\n\n\nYear\n\nYear\n\nSpills\n\nNumber of spills\n\nOil\n\nAmount of oil lost (thousands of metric tonnes)\n\n\n\n", - "download": "http://www.statsci.org/data/general/spills.txt", - "filename": "spills", - "name": "Accidental Oil Spills", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "These data refer to a survey of the fauna on the sea bed lying between the coast of northern Queensland and the Great Barrier Reef. The sampling region covered a zone which was closed to commercial fishing, as well as neighbouring zones where fishing was permitted. In view of the large numbers and types of species captured in the survey the catch was summarized as a score, on a log weight scale, which combines information across species. Two such scores are available. The details of the survey, and a full analysis of the data, are in Poiner et al (1997). \n\n\n\n\nVariable\n\nDescription\n\n\n\n\nZone\n\nan indicator for the closed (1) and open (0) zones \n\nYear\n\nan indicator of 1992 (0) or 1993 (1) \n\nLatitude\n\nlatitude of the sampling position \n\nLongitude\n\nlongitude of the sampling position \n\nDepth\n\nbottom depth \n\nScore1\n\ncatch score 1 \n\nScore2\n\ncatch score 2 \n", - "download": "http://www.statsci.org/data/oz/reef.txt", - "filename": "reef", - "name": "Prawn Trawling in the Great Barrier Reef", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The annual number of deaths from floods in the United states from 1995 through 2015. Years are not provided, but the data values are in time order.", - "download": "https://dasl.datadescription.com/download/data/3211", - "filename": "Floods-2015", - "name": "Floods 2015", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Climate scientists have been observing the extent of sea ice using satellite observations. Many have expressed concern because, since 1980, the extent of sea ice has declined precipitously—possibly due to global climate change. But a multiple regression of Extent on temp and year gives a coefficient for temp that is essentially zero. ", - "download": "https://dasl.datadescription.com/download/data/3443", - "filename": "Sea-ice", - "name": "Sea ice", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "As part of the course work, a class at an upstate\nNY college collects data on streams each year. Students\nrecord a number of biological, chemical, and physical variables,\nincluding the stream name, the substrate of the stream\n(limestone (L), shale (S), or mixed (M)), the pH, the temperature\n(\u001dC), and the BCI, a measure of biological diversity.", - "download": "https://dasl.datadescription.com/download/data/3463", - "filename": "Streams", - "name": "Streams", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The file ch2.dat contains the following variables:\n\n animal - a unique identifier associated with each C. dubia tested\n conc - concentration (micro grams/L)\n brood1 - number of young produced in the first brood\n brood2 - number of young produced in the second brood\n brood3 - number of young produced in the third brood\n total - sum of young produced in the 3 broods (=brood1 + brood2 + brood3)", - "download": "http://lib.stat.cmu.edu/datasets/csb/ch2.dat", - "filename": "Pollutants", - "name": "Assessing Toxicity of Pollutants in Aquatic Systems ", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": false - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "For a selection of months in the period 1970 to 1983, a measurement of the\nocean salinity at a depth of 100 meters off the Alaskan coast, given in parts\nper thousand. Columns are:\n\n 1. year\n 2. month\n 3. salinity", - "download": "http://lib.stat.cmu.edu/crab/salinity", - "filename": "salinity-2", - "name": "ocean salinity", - "number_format": 31, - "remove_quotes": true, - "separator": "SPACE", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": false - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "For a selection of months in the period 1970 to 1983, a measurement of the\nocean temperature at a depth of 100 meters off the Alaskan coast, given in\ndegrees Celsius. Columns are:\n\n 1. year\n 2. month\n 3. temperature", - "download": "http://lib.stat.cmu.edu/crab/celsius", - "filename": "celsius", - "name": "ocean temperature", - "number_format": 31, - "remove_quotes": true, - "separator": "SPACE", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": false - } - ], - "subcategory_name": "Waters" - }, - { - "datasets": [ - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Groups of dolphins were observed off the coast of Iceland near Keflavik in 1998. The data here give the time of the day and the main activity of the group, whether travelling quickly, feeding or socializing. The dolphin groups varied in size - usually feeding or socializing groups were larger than travelling groups. \n\n\n\n\nVariable\n\nDescription\n\n\n\n\nActivity\n\nMain activity of group: travelling (Travel), feeding (Feed) or socializing (Social)\n\nPeriod\n\nTime of the day: Morning, Noon, Afternoon or Evening\n\nGroups\n\nNumber of groups observed\n\n\n\n", - "download": "http://www.statsci.org/data/general/dolpacti.txt", - "filename": "dolpacti", - "name": "Activities of Dolphin Groups", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Cairns (1988) analysed the relation between population and foraging area for seabird colonies. The following table presents their data for 22 black-legged kittiwake (a northern gull) colonies of Scotland's Shetland and Orkney Islands. Area is km2 and Population is the number of breeding pairs. ", - "download": "http://www.statsci.org/data/general/kittiwak.txt", - "filename": "kittiwak", - "name": "Kittiwake Colonies", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Some handicapped people have access to trained monkey helpers that can perform household tasks like switching things on and off. This data set gives the number of tasks each of nine monkeys can perform along with the number of years the monkeys have been working with handicapped people. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nName \n\nName of monkey\n\nYears \n\nNumber of years the monkey has worked with handicapped people\n\nTasks \n\nNumber of tasks the monkey can perform\n", - "download": "http://www.statsci.org/data/general/monkeys.txt", - "filename": "monkeys", - "name": "Trained Monkeys", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Includes brain and body weight, life span, gestation time, time sleeping, and predation and danger indices for 62 species of mammals. Of interest is to predict the time spent sleeping and the proportion of sleep time in dream sleep. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nBodyWt\n\nbody weight (kg)\n\nBrainWt\n\nbrain weight (g)\n\nNonDreaming\n\nslow wave (\"nondreaming\") sleep (hrs/day)\n\nDreaming\n\nparadoxical (\"dreaming\") sleep (hrs/day)\n\nTotalSleep\n\ntotal sleep, sum of slow wave and paradoxical sleep (hrs/day)\n\nLifeSpan\n\nmaximum life span (years)\n\nGestation\n\ngestation time (days)\n\nPredation\n\npredation index (1-5)\n1 = minimum (least likely to be preyed upon); 5 = maximum (most likely to be preyed upon)\n\nExposure\n\nsleep exposure index (1-5)\n1 = least exposed (e.g. animal sleeps in a well-protected den); 5 = most exposed\n\nDanger\n\noverall danger index (1-5) (based on the above two indices and other information)\n1 = least danger (from other animals); 5 = most danger (from other animals)\n\n\n\n", - "download": "http://www.statsci.org/data/general/sleep.txt", - "filename": "sleep_", - "name": "Sleep in Mammals", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Results of horse races at Eagle Farm, Brisbane, on 31 August 1998. The data, collected by Donald Forbes for his MS305 Data Analysis Project, give results for each horse in a sequence of 8 races. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nPosition\n\nFinishing position\n\nStarters\n\nNumber of horses in race\n\nLast\n\nFinishing position in last race\n\nSince\n\nDays since last race\n\nNumber\n\nIdentifying number of horse in race\n\nCarried\n\nWeight carried\n\nWeight\n\nHandicap weight\n\nBarrier\n\nBarrier position at start of race\n\nDistance\n\nLength of race\n\nLengths\n\nNumber of lengths that horse finished from winner\n\nOdds\n\nStarting odds\n\nStarts\n\nNumber of races previously started in\n\nAge\n\nAge of horse in years\n\nRatio\n\nProportion of wins in previous starts\n", - "download": "http://www.statsci.org/data/oz/horses.txt", - "filename": "horses", - "name": "Horse Racing at Eagle Farm", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The data give the survival times (in 10 hour units) in a 3 x 4 factorial experiment, the factors being (a) three poisons and (b) four treatments. Each combination of the two factors is used for four animals, the allocation to animals being completely randomized. \n", - "download": "http://www.statsci.org/data/general/poison.txt", - "filename": "poison", - "name": "Poison Experiment", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The data was collected by Peter Drew and Matt Seidemann, statistics students at the Queensland University of Technology, in a subject taught by Dr Margaret Mackisack. Here is their description of the data and its collection: \n\"As keen fishermen out and about on a fairly regular basis, the common arguments arise between anglers on the best rigging set up for various conditions. We decided that upon our next group outing that we would back up our opinions with hard statistical facts. Our interest led us to test the most obvious variables in the fishing rig. \n\"Of interest were firstly the rod length, as between fisherman there always tends to be a variety of rods of different sizes; secondly the type of line, in that the larger the line it would be logical that the weight would increase; thirdly the sinker weight and how it affected the casting distance. \n\"In deciding on the three variables a 2^3 factorial design seemed obvious and for our purposes seemed to be quite adequate. So the question was placed as to whether or not the above variables in any combination made any difference to the overall distance the line was cast. The rods used were 6ft and 7ft two piece boat rods, fitted with the same type of spinning reel. The variable sinkers were 8oz and 12oz round ball sinkers and the line used was either the 1kg or 2kg line of the same make. \n\"The experiment was carried out on a day that was close to windless thus lowering the relative influence of the wind. The series of casts was conducted by the same person as were the measurements thus giving uniformity to the total experiment. A break of five minutes was timed between casts so as to allow the caster to allocate the same amount of energy to each cast. The rods were not rigged by the caster; a rigger would set the rod up with a combination of sinker, line and rod, and an effort was made to keep the caster oblivious to the changes in the rig. \n\"The experiment was conducted on the rugby ovals on Oleria St, Brookside (a western surburb of Brisbane) adjacent to the RSL (Returned Serviceman League club), which for all intents and purposes would be classified as a level surface. A line was placed at one end of the field and from it the caster would cast the rod as he would given normal fishing conditions. A spotter who was also the measurer would mark the point of impact of the sinker and from it measure back to the line from which it was cast. The distance observed was subsequently rounded up to the nearest 0.5 of a metre. Two runs were made of each combination. \n\"Possible improvements: Because of the time the rigging took, both casts with each rig were done at the same time. If we did it again it would be better to use random numbers to decide the order of all sixteen casts.\" ", - "download": "http://www.statsci.org/data/oz/fishing.txt", - "filename": "fishing_", - "name": "Fishing Rod Experiment", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Four male and four female turtles had their plasma protein measured while they were well fed and after ten and twenty days of fasting. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nSubject\n\n1-8\n\nSex\n\nMale or Female\n\nFed\n\nPlasma protein while well fed (mg/ml)\n\nFasted10\n\nPlasma protein after fasting 10 days\n\nFasted20\n\nPlasma protein after fasting 20 days\n", - "download": "http://www.statsci.org/data/general/turtles.txt", - "filename": "turtles", - "name": "Plasma Protein of Fasting Turtles", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Frogs of four species had their oxygen consumption measured at two temperatures and two exercise levels. There were two frogs of each species at each temperature, and each of the two was measured both at rest and during forced exercise. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nSubject\n\n1-16\n\nSpecies\n\n1-4\n\nTemperature\n\nLow or High\n\nRest\n\nOxygen consumption (ml O2/g/hr) at rest\n\nExercise\n\nOxygen consumption during exercise\n\n\n\n\n", - "download": "http://www.statsci.org/data/general/frogs.txt", - "filename": "frogs_", - "name": "Oxygen Consumption of Frogs", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The data give the age and the length of dugongs Dugong dugon (M�ller) captured near Townsville in north Queensland, Australia. The lifespan of a dugong is 50-60 years.\nThese data were working estimates. In particular the method of determining the age of dugong has changed somewhat since the data were recorded. \n\n\n\n\nVariable\n\nDescription\n\n\n\n\nAge\n\nAge in years\n\nLength\n\nLength in metres\n\n\n\n", - "download": "http://www.statsci.org/data/oz/dugongs.txt", - "filename": "dugongs", - "name": "Age and Length of Dugongs near Townsville", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The data give the sound pressure of sonar signals (\"clicks\") from a dolphin at various ranges to target. The measurements were made off the coast of Iceland near Keflavik in 1998. The pressure measurement given is \nraw pressure + a Range \nwhere a is a known constant depending on the water density. Pressure is expected to increase with distance even after the adjustment. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nRange \n\nDistance to dolphin in metres\n\nSoundPressure \n\nWater sound pressure adjusted for water density\n", - "download": "http://www.statsci.org/data/general/dolphin.txt", - "filename": "dolphin", - "name": "Sound Pressure of Dolphin Sonar", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The observed responses are Geiger counter counts (times 10-4) used to measure the amount of radioactively tagged sulfate drug in the blood of a baboon named Brunhilda after an injection of the drug. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nHours\n\nTime in hours since injection\n\nSulfate\n\nGeiger counter counts � 10-4\n", - "download": "http://www.statsci.org/data/general/brunhild.txt", - "filename": "brunhild", - "name": "Blood Sulfate in a Baboon Named Brunhilda", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The European rabbit Oryctolagus cuniculus is a major pest in Australia. A reliable method of age determination for rabbits caught in the wild would be of importance in ecological studies. In this study, the dry weight of the eye lens was measured for 71 free-living wild rabbits of known age. Eye lens weight tends to vary much less with environmental conditions than does total body weight, and therefore may be a much better indicator of age \nThe rabbits were born and lived free in an experimental 1.7 acre enclosure at Gungahlin, ACT. The birth data and history of each individual were accurately known. Rabbits in the enclosure depended on the natural food supply. In this experiment, 18 of the eye lenses were collected from rabbits that died in the course of the study from various causes such as coccidiosis, bird predation or starvation. The remaining 53 rabbits were deliberately killed, immediately after being caught in the enclosure or after they had been kept for some time in cages. The lenses were preserved and their dry weight determined. \n\n\n\n\nVariable\n\nDescription\n\n\n\n\nAge\n\nAge of rabbit in days\n\nLens\n\nDry weight of eye lens in milligrams\n", - "download": "http://www.statsci.org/data/oz/rabbit.txt", - "filename": "rabbit", - "name": "Age and Eye Lens Weight for Rabbits in Australia", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Insects were exposed to gaseous carbon disulphide for a period of 5 hours. Eight experiments were run with different concentrations of carbon disulphide. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nDose\n\nDose of carbon disulphide\n\nExposed\n\nNumber of beetles exposed\n\nMortality\n\nNumber of beetles killed\n", - "download": "http://www.statsci.org/data/general/beetles.txt", - "filename": "beetles", - "name": "Beetle Mortality", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Bill Venables writes: \nGroups of 20 snails were held for periods of 1, 2, 3 or 4 weeks in carefully controlled conditions of temperature and relative humidity. There were two species of snail, A and B, and the experiment was designed as a 4 by 3 by 4 by 2 completely randomized design. At the end of the exposure time the snails were tested to see if they had survived; the process itself is fatal for the animals. The object of the exercise was to model the probability of survival in terms of the stimulus variables, and in particular to test for differences between species. The data are unusual in that in most cases fatalities during the experiment were fairly small. \nSpecies\n \nSnail species A or B \nExposure\n \nExposure in weeks (4 levels) \nHumidity\n \nRelative humidity (4 levels) \nTemp\n \nTemperature in degrees Celsius (3 levels) \nDeaths\n \nNumber of deaths \nN \n \nNumber of snails exposed \n", - "download": "http://www.statsci.org/data/oz/snails.txt", - "filename": "snails_", - "name": "Snail Mortality", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Activity of individually caged fiddler crabs under constant conditions for 225 consecutive hours (225 = 9*25 = 9*24 + 8). The activity scale is log(y+1) where y is mean minutes per hour. Examination of the data suggests that the logarithm was base 10. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nActivity\n\nlog(Minutes per hour+1)\n", - "download": "http://www.statsci.org/data/general/fiddler.txt", - "filename": "fiddler", - "name": "Activity of Fiddler Crabs", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Tidal shrimps from the Brisbane River move up and down the tidal area (harbour pylon for example) in accordance with the movement of the tides. In this experiment shrimps were removed from their natural environment and isolated from environmental stimulae which would allow them to measure time. Their vertical position on an inclined slope was recorded every half hour starting 20 hours after removal and continuing for one week. Also recorded is the actual tide height during the same period, and six other measures of the shrimps' activity. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nTime \n\nHours since isolation\n\nVertical \n\nVertical displacement from original position\n\nY2 - Y7 \n\nOther activity measurements\n\nTide \n\nActual tide height\n", - "download": "http://www.statsci.org/data/oz/shrimp.txt", - "filename": "shrimp_", - "name": "Movement of Tidal Shrimps in Isolation", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Monthly total number of pigs slaughtered in Victoria, from January 1980 to August 1995.", - "download": "http://www.statsci.org/data/oz/pigs.txt", - "filename": "pigs", - "name": "Pigs Slaughtered in Victoria", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Seed predators and herbivores can operate as strong selective agents in the evolution of plant defence. In this context, Delpino (1886) posed the \"ant-guard\" hypothesis to explain the role of extrafloral nectaries on plants. Extrafloral nectaries (EFN), distributed on species in over 80 plant families, occur on vegetative organs and \"outer floral parts\" not directly associated with pollination. Basically, the hypothesis states that extrafloral nectar production attracts pugnacious \"bodyguards\" (usually ants) which by their foraging activities deter the activities of herbivorous insects and seed predators. \nSince its inception, the ant-guard hypothesis has remained controversial. A few careful studies have experimentally demonstrated that ants attending EFN protect plants (von Wettstein, 1889; Inouye and Taylor, 1979; Schemske, 1980) while several recent studies showed no effect (O’Dowd and Catchpole, 1983; Tempel, 1983; Boecklen, 1984). O’Dowd and Catchpole (1983), for example, found that attendance of ants at EFN deterred other insects from developing flowerheads but that their presence decreased neither the numbers of seed predators nor damage to developing flowerheads. The object of this paper is to describe the ant-insect interactions by means of a simple probability model. \nFull experimental detail is provided by O'’Dowd and Catchpole (1983) but an outline is as follows. The plants studied were helichrysum bracteatum. Three sites were chosen in clearings in the Tallaganda State forest, 40 km. southeast of Canberra, and at each site ten pairs of plants were studied. Plants within each pair were of similar initial size and less than 1 metre apart. Within each pair, ants were excluded from one plant, while the other served as a control. The plants were censused once a week for 17 weeks over the reproductive season (from initiation of flowerheads through the postflowering phase). The data recorded for each plant included the number of flowerheads (capitula), the number of capitula with ants, and the total number of other insects. Different species of ants (predominantly Iridomyrmex spp.) and other insects were observed, but in the data here are pooled within each general category. \nTo clarify: the first column (Week) lists the week the observation was made, the second (Index) lists the index given to the pair of plants observed, the third (AntCap) is the number of capitula on the plant with ant access, the fourth (ExcCap) is the number of capitula on the plant excluded from ant access, the fifth (Ants) is the number of capitula that have ants present on them, the sixth column (AntIns) is the number of insects on the plant with ant access, and the seventh (ExcIns) is the number of insects on the plant excluded from ant access. Index number 1-10 refer to Site 1, 11-20 to Site 2 and 21-30 to Site 3.", - "download": "http://www.statsci.org/data/oz/ants.txt", - "filename": "ants", - "name": "Ant-Insect Interactions on Flowerheads", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "A new type of heart valve has been developed and is implanted in 63 dogs that have been raised on various levels of exercise. The numbers of valve transplants that succeed are recorded. Is the proportion of successful implants the same for dogs on all exercise regimens? Is there a trend with amount of exercise in the proportion of successful implants? \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nExercise\n\nAmount of exercise: 1=None, 2=Slight, 3=Moderate, 4=Vigorous\n\nImplant\n\n1=Successful, 2=Unsuccessful\n\nFrequency\n\nNumber of dogs\n\n\n\n", - "download": "http://www.statsci.org/data/general/exervalv.txt", - "filename": "exervalv", - "name": "Heart Valves in Dogs on Different Exercise Regimens", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The data give growth measurements on Tammar wallabies (Macropus eugenii). Each line is a set of measurements on an animal at a particular time. Most lengths are in tenths of millimetres. The data from some animals is very fragmentary. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nAnim\n\nAnimal number\n\nSex\n\n1=male, 2=female\n\nLoca\n\nLocation of animal\n\nLeng\n\nLength of animal (tenths of a millimetre)\n\nHead\n\nHead length\n\nEar\n\nEar Length\n\nArm\n\nArm length\n\nLeg\n\nLeg length\n\nPres\n\nPes (foot) length\n\nTail\n\nTail length\n\nWeight\n\nWeight (tenths of a gram)\n\nAge\n\nAge in days from birth\n", - "download": "http://www.statsci.org/data/oz/wallaby.txt", - "filename": "wallaby", - "name": "Dryandra Tammar WallabyGrowth of Tammar Wallabies", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Following the Second World War, D. L. Serventy carried out a detailed study of the lifecycle of the Tasmanian muttonbird (Puffinus tenuirostris, often called the short-tailed shearwater). The data here concerns the growth pattern of fledgling birds of this species. \nAfter the eggs hatch, the parent birds spend much time away from the next, and with increasing time their returns become rarer and rarer. When they return the young bird feed copiously, and there is very rapid weight-gain; whilst they are absent, the offspring loses weight. The result is not a smooth growth curve such as one finds in most measurements in developing animals and birds, but a 'sawtooth' effect. The data were collected in 1954 as weighings each morning of two fledgling chicks on Fisher Island, Bass Strait, and each set terminates on the day the chick left the nest. \nMuch of the interest in these curves comes not from the description they give of the weight of the chick, but from the information they contain on the feeding patterns of the parents. There are three obvious features of the data; the timing of the feeds and the size of the feeds when they occur, both of which represent aspects of the feeding pattern of the parents; and the loss in weight of the chicks between feeds. Henstridge and Tweedie (1984) proposed a model, similar to those used in storage theory, which describes each of these phenomena separately.", - "download": "http://www.statsci.org/data/oz/muttonbi.txt", - "filename": "muttonbi", - "name": "Growth of Tasmanian Muttonbirds", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Dimensions in millimetres are given of two samples of jellyfish from Hawkesbury River in New South Wales, Australia. One of the samples came from Dangar Island and the other from Salamander Bay. The first column contains a \"D\" if the measurement came from Dangar Island and a \"S\" if it came from Salamander Bay. The dimensions measured were length and width. What can one learn from graphing the two principal components? Try graphing principal components of the logarithms of the measurements. Can the dimensions determine the location?", - "download": "http://www.statsci.org/data/oz/jellfish.txt", - "filename": "jellfish", - "name": "Dimensions of Jellyfish", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "A study was conducted concerning the counts of lesions produced on membranes of chick embryos by viruses of the pox group. The data give the numbers of lesions formed at a series of dilutions of the viral medium. \n\n\n\n\nVariable\n\nDescriptions\n\n\n\n\nDilution\n\nDilution of viral medium, from 1 to 32\n\nCount\n\nNumber of lesions\n", - "download": "http://www.statsci.org/data/general/pocklesi.txt", - "filename": "pocklesi", - "name": "Pock Lesions on Chick Embryos", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "This data comes from an experiment on induction of flowering of cyclamen. Plants of 4 varieties of cyclamen were subject to a combination of 6 temperature regimens and 4 levels of fertilization. The temperature regimens are combinations of five temperatures during the day (14, 16, 18, 20 and 26 degrees C) and four temperatures during the night (14, 16, 18 and 20 C). Not all the combinations of temperatures are present. The response is the number of flowers, which vary from 4 to 26, with mode 8. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nVariety\n\nVariety of cyclamen\n\nRegimem\n\nTemperature regimen (combination of the temperature during the day and the temperature during the night)\n\nDay\n\nTemperature during the day (Centigrade)\n\nNight\n\nTemperature during the night\n\nFertilizer\n\nLevel of fertilization\n\nFlowers\n\nNumber of flowers\n", - "download": "http://www.statsci.org/data/general/cyclamen.txt", - "filename": "cyclamen", - "name": "Number of Cyclamen Flowers", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "In an experiment where pregnant mice were exposed to the herbicide 2,4,5-T (the active component in Agent Orange), the number of fetal implants in utero were recorded. The data give the frequency distribution of implants at each of seven dose levels measured in mg/kg of body weight. \nOn days 6-14 after mating, pregnant dams were dosed by gavage with one of the doses of 2,4,5-T. Prior to giving birth, the dams were sacrificed and the number of viable, dead and reabsorbed foetuses in the uterus of the dam were determined. The data here gives the number of surviving viable implants. An outcome of zero implants cannot be distinguished from a non-pregnant outcome so any zero implant outcomes were excluded. \n\n\n\n\nVariable\n\nDescriptions\n\n\n\n\nDose\n\nDose of 2,4,5-T in mg/kg/day\n\nImplants\n\nNumber of surviving implants\n\nFrequency\n\nNumber of mice with that number of implants\n", - "download": "http://www.statsci.org/data/general/fetaimpl.txt", - "filename": "fetaimpl", - "name": "Fetal Implants in Mice Utero", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "-", - "download": "https://dasl.datadescription.com/download/data/3074", - "filename": "Bird-Species-2013", - "name": "Bird-Species-2013", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The ranges inhabited by the Indian gharial\ncrocodile and the Australian saltwater crocodile overlap in\nBangladesh. Suppose a very large crocodile skeleton is found\nthere, and we wish to determine the species of the animal.\nWildlife scientists have measured the lengths of the heads\nand the complete bodies of several crocs (in centimeters) of\neach species.\n", - "download": "https://dasl.datadescription.com/download/data/3147", - "filename": "Crocodile-lengths", - "name": "Crocodile lengths", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "In 2004, a team of researchers published a study of contaminants in farmed salmon. Fish from many sources were analyzed for 14 organic contaminants. The study\nexpressed concerns about the level of contaminants found. One of those was the\ninsecticide mirex, which has been shown to be carcinogenic and is suspected to be\ntoxic to the liver, kidneys, and endocrine system. The dataset holds 153 observed salmon samples and reports concentrations of a number of contaminant.", - "download": "https://dasl.datadescription.com/download/data/3199", - "filename": "Farmed-salmon", - "name": "Farmed salmon", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Wildlife researchers monitor many wildlife populations by taking aerial photographs. Can they estimate the weights of alligators accurately from the air? Here are data on the Weight of alligators (in pounds) and their Length (in inches). ", - "download": "https://dasl.datadescription.com/download/data/3236", - "filename": "Gators", - "name": "Gators", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The Maine lobster fishing industry is carefully controlled and licensed, and facts about it have been recorded for more than a century, so it is an important industry that we can examine in detail. The dataset holds annual data ", - "download": "https://dasl.datadescription.com/download/data/3317", - "filename": "Lobsters-2016", - "name": "Lobsters 2016", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Manatees are gentle mammals that live in the waters off the coast of Florida and a few other places. Unfortunately, many are killed each year in collisions with powerboats. Marine biologists warn that the growing number of powerboats registered in Florida threatens the existence of manatees. The data here are the number of manatees killed each year since 1982 and the number of powerboats registered in Florida (in thousands) for those years. Is there a relationship?", - "download": "https://dasl.datadescription.com/download/data/3325", - "filename": "Manatees-2015", - "name": "Manatees 2015", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Psychology experiments sometimes involve testing the\nability of rats to navigate mazes. The mazes are classified\naccording to difficulty, as measured by the mean length of\ntime it takes rats to find the food at the end. One researcher\nneeded a maze that will take rats an average of about one minute\nto solve. He tested one maze on several rats, collecting the\ndata provided.", - "download": "https://dasl.datadescription.com/download/data/3333", - "filename": "Maze", - "name": "Maze", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Can pleasant smells improve learning? Researchers timed 21 subjects as they tried to complete paper-and-pencil mazes. Each subject attempted a maze both with and without the presence of a floral aroma. Subjects were randomized with respect to whether they did the scented trial first or second. Is there any evidence that the floral scent improved the subjects’ ability to complete the mazes?", - "download": "https://dasl.datadescription.com/download/data/3334", - "filename": "Mazes-smells", - "name": "Mazes and smells", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Emperor penguins are the most accomplished divers among birds, making routine\ndives of 5–12 minutes, with the longest recorded dive over 27 minutes. These\nbirds can also dive to depths of over 500 meters! Since air-breathing animals like\npenguins must hold their breath while submerged, the duration of any given dive\ndepends on how much oxygen is in the bird’s body at the beginning of the dive, how\nquickly that oxygen gets used, and the lowest level of oxygen the bird can tolerate.\nThe rate of oxygen depletion is primarily determined by the penguin’s heart rate.\nConsequently, studies of heart rates during dives can help us understand how these\nanimals regulate their oxygen consumption in order to make such impressive dives.The researchers equipped emperor penguins with devices that record their heart rates during\ndives. The dataset reports Dive Heart Rate (beats per minute), the Duration\n(minutes) of dives, and other related variables.", - "download": "https://dasl.datadescription.com/download/data/3391", - "filename": "Penguins", - "name": "Penguins", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Salmon", - "download": "https://dasl.datadescription.com/download/data/3435", - "filename": "Salmon", - "name": "Salmon", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The number of storks in Oldenburg, Germany, plotted against the population of the town for 7 years in the 1930s. Do storks bring babies? ", - "download": "https://dasl.datadescription.com/download/data/3462", - "filename": "Storks", - "name": "Storks", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Large herds of wild horses can become a problem on some federal lands in the West. Researchers hoping to improve the management of these herds collected data to see if they could predict the number of foals that would be born based on the size of the current herd. ", - "download": "https://dasl.datadescription.com/download/data/3524", - "filename": "Wild-horses", - "name": "Wild horses", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The dataset consists of a few variables that may influence the demand for Beef in the United States. It provides an example of the influence of inflation in monetary time series data as well as providing some interesting statistical features in building demand models in regression.", - "download": "http://jse.amstat.org/v22n1/kopcso/BeefDemand.txt", - "filename": "BeefDemand", - "name": "Beef Demand", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "159 fishes of 7 species are caught and measured. Altogether there are\n8 variables. All the fishes are caught from the same lake\n(Laengelmavesi) near Tampere in Finland.", - "download": "http://jse.amstat.org/datasets/fishcatch.dat.txt", - "filename": "fishcatch", - "name": "fishcatch", - "number_format": 31, - "remove_quotes": true, - "separator": "SPACE", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": false - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "A cost of increased reproduction in terms of reduced longevity has been\nshown for female fruitflies, but not for males. The flies used were an\noutbred stock. Sexual activity was manipulated by supplying individual\nmales with one or eight receptive virgin females per day. The\nlongevity of these males was compared with that of two control types.\nThe first control consisted of two sets of individual males kept with\none or eight newly inseminated females. Newly inseminated females will\nnot usually remate for at least two days, and thus served as a control\nfor any effect of competition with the male for food or space. The\nsecond control was a set of individual males kept with no females.\nThere were 25 males in each of the five groups, which were treated\nidentically in number of anaesthetizations (using CO2) and provision of\nfresh food medium.", - "download": "http://jse.amstat.org/datasets/fruitfly.dat.txt", - "filename": "fruitfly", - "name": "Sexual activity and the lifespan of male fruitflies", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The file ch4a.dat contains the burlap data, with the following variables:\n\n1. mburlap = mean burlap count value obtained over 12 subplot values.\n\n2. megg = mean egg mass density per acre obtained over 21 subplot values.\n\n3. seegg = estimated standard error of mean egg mass density obtained\nover 21 subplot values.", - "download": "http://lib.stat.cmu.edu/datasets/csb/ch4a.dat", - "filename": "Gypsy-Moth", - "name": "Measurement Error Models for Gypsy Moth Studies", - "number_format": 31, - "remove_quotes": true, - "separator": "SPACE", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": false - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The file ch4b.dat contains the defoliation data, with the following variables:\n\n1. mdef = mean defoliation value obtained from 20 subplot values.\n\n2. sedef = estimated standard error of mean defoliation\nobtained from 20 subplot values.\n\n3. megg = mean estimated egg mass density obtained over 20 subplots\n\n4. seegg = estimated standard error or mean egg \nmass density obtained from 20 subplot values.\n\n5. cdefegg = estimated covariance of mean defoliation and mean egg mass\ndensity obtained from 20 subplot values.\n", - "download": "http://lib.stat.cmu.edu/datasets/csb/ch4b.dat", - "filename": "Gypsy-Moth2", - "name": "Measurement Error Models for Gypsy Moth Studies", - "number_format": 31, - "remove_quotes": true, - "separator": "SPACE", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": false - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The file ch7.dat contains the following variables:\n\nNo - observation number (1,...,294).\nTIME - survival time of halibut (time until death) in hours.\n (NOTE the Table 1 in the book claims survival time is in minutes,\n but HOURS is the correct unit)\nCENSOR - censoring indicator. 1=uncensored observation;\n 0=censored observation.\nTOWD - duration (in minutes) of time trawl net was towed on the bottom.\nDELDEPTH - difference between maximum and minimum depth observed during tow\n (depth measured in meters).\nLENGTH - fork length of halibut in centimeters.\nHANDTIME - handling time (in minutes) between net coming on board vessel \n and fish being placed in holding tanks.\nLOGCAT - natural logarithm of total catch of fish in tow.", - "download": "http://lib.stat.cmu.edu/datasets/csb/ch7.dat", - "filename": "Atlantic-Halibut", - "name": "Survival Analysis for Size Regulation of Atlantic Halibut", - "number_format": 31, - "remove_quotes": true, - "separator": "SPACE", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": false - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The file ch9.dat contains the following variables:\n\nBIRD : Bird id. \nRX : 1=NT, 2=PT, 3=FT, standing for \"No Tape\" (NT), in which no visible\n guides connected light cues\n with the feeders below them; \"Partial Tape\" (PT), in which fluorescent\n orange Dymo type provided a discontinuous (i.e., broken in two places) \n connection between each light cue and its feeder; and \"Full Tape\"\n (FT), in which the visible guide between each light cue and\n its feeder (fluorescent orange Dymo tape) was continuous.\n Feeding continued for 180 trials.\nGENDER : 0=male, 1=female. \nOUTCOME: 0=failure 1= success.", - "download": "http://lib.stat.cmu.edu/datasets/csb/ch9.dat", - "filename": "Hummingbirds", - "name": "Spatial Association Learning in Hummingbirds\n", - "number_format": 31, - "remove_quotes": true, - "separator": "SPACE", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": false - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The file ch10.dat contains eight variables, with 30 cases for each.\nEach case refers to a site in the forest. The first variable,\n'random', is a character variable indicating whether the site is a\nspotted owl nest site (=N) or a site selected at random\ncoordinates (=R). Variables 2-8 contain the percents of mature forest\n(>80 years of age). The variable names indicate the outer radii of the\nrings in which the percents were calculated. They are: 0.91km,\n1.18km, 1.40km, 1.60km, 1.77km, 2.41km, and 3.38km. So, for example,\nthe variable '1.18km' contains the percents of mature forest in\nrings with outer radius 1.18km and inner radius .91km centered at \nthe different sites.", - "download": "http://lib.stat.cmu.edu/datasets/csb/ch10.dat", - "filename": "Habitat-Association", - "name": "Habitat Association Studies of the Northern Spotted\nOwl, Field Grouse, and Flammulated Owl\n", - "number_format": 31, - "remove_quotes": true, - "separator": "SPACE", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": false - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The file ch11a.dat contains a body temperature time series for an\nadult female beaver (Castor canadensis) obtained December 12-13, 1990 \nat Sandhill Wildlife Area, Wisconsin. Observations were made at 10\nminute intervals. These observations follow a random pattern of\nfluctuations, typically observed during freeze-up for all beaver in\nthis study. \n\nVariable List:\n\nObservation No.\nJulian day\nTime\nBody temperature (degrees C) \nActivity (0 = animal inside retreat; 1 = animal outside retreat) \n", - "download": "http://lib.stat.cmu.edu/datasets/csb/ch11a.dat", - "filename": "Beaver-Body-Temperatures", - "name": "Time-Series Analyses of Beaver Body Temperatures", - "number_format": 31, - "remove_quotes": true, - "separator": "SPACE", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": false - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The file ch11a.dat contains a body temperature time series for\na subadult female beaver (Castor\ncanadensis). Observations were made at Sandhill Wildlife Area,\nWisconsin, November 3-4, 1990 (before freeze-up). Temperature\nobservations follow a plateau pattern, typically observed during\nthe entire ice-free period (late spring to late autumn). Only the\nfirst 100 observations are included in this data set.\n\nVariable list:\n\nObservation number\nJulian day\nTime\nBody temperature (degrees C)\nActivity (0 = animal inside retreat; 1 = animal outside retreat)", - "download": "http://lib.stat.cmu.edu/datasets/csb/ch11b.dat", - "filename": "Beaver-Body-Temperatures2", - "name": "\nTime-Series Analyses of Beaver Body Temperatures\n", - "number_format": 31, - "remove_quotes": true, - "separator": "SPACE", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": false - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The main data set consists of king crab pot survey data for the years 1973\nthrough 1986. The surveys were conducted in the waters around Kodiak Island,\nAlaska, using pots similar to the pots used by the commercial fishing fleet.\n(A crab pot is a trap that resembles a wooden crate.) A fixed sampling grid\nwas used to place strings of pots (stations) consisting usually of 10 pots in\nopen ocean, or of 2-5 pots in bays. The pots were left in the water for\nperiods of 16-24 hours, removed, and the crab counts recorded. The survey was\nconducted each summer, 2-4 weeks prior to start of the commercial fishing\nseason. The crab counts are classified by size (roughly representing age) and\nsex into six categories.\n\nThe basic survey data is a file \"survey\", containing a 3,450 by 14 matrix\nwith these columns:\n\n 1. Year (last two digits)\n 2. Fishing district (one of four)\n 3. Station identifier (alphabetic)\n 4. The number of pots fished\n 5-6. Latitude and longitude of the location halfway between\n the first and last pot of the station\n 7. Number of pre-recruit-4 crab\n 8. Number of pre-recruit-3 crab\n 9. Number of pre-recruit-2 crab\n 10. Number of pre-recruit-1 crab\n 11. Number of recruit males\n 12. Number of post-recruit males\n 13. Number of juvenile females\n 14. Number of adult females", - "download": "http://lib.stat.cmu.edu/crab/survey", - "filename": "survey_", - "name": "Survey", - "number_format": 31, - "remove_quotes": true, - "separator": "SPACE", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": false - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "==================== Contents of file \"dstns\" ============================\n \nFor each of the years in the survey (1973 to 1986), a frequency distribution\nof the crab by size (in 1 mm increments) that were surveyed. Separate\ndistributions are given for juvenile females, adult females, and all males.\nThe five columns are:\n\n 1. year\n 2. length in mm\n 3. count of juvenile females\n 4. count of adult females\n 5. count of all males", - "download": "http://lib.stat.cmu.edu/crab/dstns", - "filename": "dstns", - "name": "dstns", - "number_format": 31, - "remove_quotes": true, - "separator": "SPACE", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": false - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "For each of the 14 years in the survey (1973-86), an estimate of the number of\neggs per female. Columns are:\n\n 1. year\n 2. estimated eggs per adult female", - "download": "http://lib.stat.cmu.edu/crab/eggs", - "filename": "eggs", - "name": "eggs per female", - "number_format": 31, - "remove_quotes": true, - "separator": "SPACE", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": false - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "For each year in the survey, a frequency distribution of all females\ncross-classified by size (in 1 mm increments) and percent clutch fullness (5\ncategories). Clutch fullness is, roughly, the realized egg-bearing potential\nof a female crab. The seven columns are:\n\n 1. year\n 2. size, in mm\n 3. count of females with 0% fullness\n 4. count of females with 1-29% fullness\n 5. count of females with 30-59% fullness\n 6. count of females with 60-89% fullness\n 7. count of females with 90-100% fullness", - "download": "http://lib.stat.cmu.edu/crab/fullness", - "filename": "fullness", - "name": "Clutch fullness", - "number_format": 31, - "remove_quotes": true, - "separator": "SPACE", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": false - } - ], - "subcategory_name": "Animals" - }, - { - "datasets": [ - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Risk and Sammarco (1991) found that the density of the Great Barrier Reef coral Porites lobata increases with distance from the Australian shore, due to differences between inshore and offshore environments. They made three measurements at each of nine reefs at various distances from the shore. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nReef\n\nName of reef\n\nDistance\n\nDistance to shore (km)\n\nDensity\n\nCoral head density (g/cm3)\n", - "download": "http://www.statsci.org/data/oz/coralden.txt", - "filename": "coralden", - "name": "Density of Great Barrier Reef Coral Heads", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The data give the volume (cubic feet), height (feet) and diameter (inches) (at 54 inches above ground) for a sample of 31 black cherry trees in the Allegheny National Forest, Pennsylvania. The data were collected in order to find an estimate for the volume of a tree (and therefore the timber yield), given its height and diameter. ", - "download": "http://www.statsci.org/data/general/cherry.txt", - "filename": "cherry", - "name": "Volume of Black Cherry Trees", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The data is from a dew-retting experiment in Ballarat 1942-43, in which flax was laid out under various climactic conditions and for various periods. Retting involves softening the flax stems by soaking in water, thus enabling the separation of the linen fibres from the wooden material by a process called scrutching. The flax variety used was \"Liral Crown\". Two samples were taken from each trial and the ret loss, as a percentage, was calculated. The other three variables are the mean daily rainfall (in points), the retting period (in days) and the mean daily temperature (in degrees Farenheit).", - "download": "http://www.statsci.org/data/oz/retloss.txt", - "filename": "retloss", - "name": "Ret Loss in Flax", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "A production plant cost-control engineer is responsible for cost reduction. One of the costly items in his plant is the amount of water used by the production facilities each month. He decided to investigate water usage by collecting seventeen observations on his plant's water usage and other variables. \n\n\n\n\nVariable\n\nDescription\n\n\n\n\nTemperature\n\nAverage monthly temperate (F)\n\nProduction\n\nAmount of production (M pounds)\n\nDays\n\nNumber of plant operating days in the month\n\nPersons\n\nNumber of persons on the monthly plant payroll\n\nWater\n\nMonthly water usage (gallons)\n", - "download": "http://www.statsci.org/data/general/water.txt", - "filename": "water_", - "name": "Water Usage of Production Plant", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Ryan et al (1994) describe the data as follows: \nIn autumn, small winged fruit called samara fall off maple trees, spinning as they go. A forest scientist studied the relationship between how fast they fell and their \"disk loading\" (a quantity based on their size and weight). The samara disk loading is related to the aerodynamics of helicopters. \nThe data give the loadings and fall velocities for fruit from three trees. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nTree\n\n1 to 3\n\nLoading\n\nDisk loading\n\nVelocity\n\nFall velocity\n", - "download": "http://www.statsci.org/data/general/samara.txt", - "filename": "samara", - "name": "Fall Velocities for Samara Fruit", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The yield of pasture regrowth was measured together with the number of days since last grazing. The measurements were done on different experimental units so it is reasonable to assume the errors independent. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nDays\n\nDays since last grazing\n\nYield\n\nYield of pasture\n", - "download": "http://www.statsci.org/data/general/regrowth.txt", - "filename": "regrowth", - "name": "Pasture Regrowth after Grazing", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Herbicide bioassay is concerned with the reduction in plant growth as a function of the herbicide dose applied. This is a interest when developing new herbicides, assessing environmental effects on non-target species or estimating the residual herbicides in a treated soil before planting a new, herbicide suspectible crop. A typical experiment would comprise a series of doses ranging from ineffective to severely damaging to establish a dose-response relationship. In this experiment the callus area of a tissue culture of Brassica napus was measured corresponding to different doses of a sulfonylurea herbicide, metsulfuron methyl. \n\n\n\n\nVariable\n\nDescription\n\n\n\n\nChlorsulfuron\n\nConcentration of herbicide in nmol/L\n\nCallus\n\nLogarithm of callus area\n", - "download": "http://www.statsci.org/data/general/brassica.txt", - "filename": "brassica", - "name": "Response of Brassica napus to Chlorsulfuron", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The data concerns the underground root system of eight separate apple trees. Three different root stocks are considered (Mark, MM106 and M26) and two plant spacing (4x2 meters and 5x3 meters). For each plant, soil core sampling units taken have been classified as belonging to an inner or outer zone. The response variable is the density of fine roots, also called the root length density, which can have zeros as well as continuous positive values. There are 511 observations, of which 193 or 38% have a zero response. \nThe design is not a full factorial design: plants 1 and 2 are tested only with the Mark root stock and at a spacing of 5x3; plants 3 and 4 are tested only with Mark root stock at a spacing of 4x2; plants 5 and 6 are tested only with root stock MM106 at a spacing of 5x3; and plants 7 and 8 are tested only with M26 root stock at a spacing of 4x2. The Mark root stock is tested at both plant spacings but the MM106 only at 5x3 and M26 only at 4x2. So there are four unique treatment combinations: Mark stock at 5x3 and 4x2, MM106 at 5x3, and M26 at 4x2. \nIt is of interest to (1) compare effects of spacing within Mark rootstock, (2) compare root stocks within same spacing and (3) to look for any difference in RLD between inner and out zones. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nPlant \n\n1 to 8\n\nStock\n\nRoot stock: Mark, MM106 or M26\n\nSpacing\n\nPlant spacing: 5x3 or 4x2 meters\n\nZone\n\nZone relative to the plant the soil core is taken from: Inner or Outer\n\nRLD\n\nRoot length density in cm/cm3\n", - "download": "http://www.statsci.org/data/oz/fineroot.txt", - "filename": "fineroot", - "name": "Root Length Density of Apple Trees", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "A biology student studied the effect of10 different fertilizers on the growth of mung bean sprouts. She sprouts 12 beans in each of 10 different petri dishes, and adds the same amount of fertilizer to each dish. After one week she measures the heights of the 120 sprouts in millimeters. ", - "download": "https://dasl.datadescription.com/download/data/3203", - "filename": "Fertilizers", - "name": "Fertilizers", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "In 1936 Sir Ronald Fisher presented data on irises as the example in a famous statistics paper. Ever since, “Fisher’s Iris data” have been a feature of statistics texts. Fisher presents 4 measurements of Iris flowers of three species. Can we differentiate the species? If so, how best to do that?", - "download": "https://dasl.datadescription.com/download/data/3206", - "filename": "Fisher-Irises", - "name": "Fisher’s Irises", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The Hopkins Memorial Forest is a 2500-acre reserve in Massachusetts, New York, and Vermont managed by the Williams College Center for Environmental Studies (CES). As part of its mission, the CES monitors forest resources and conditions over the long term. ", - "download": "https://dasl.datadescription.com/download/data/3271", - "filename": "Hopkins-Forest", - "name": "Hopkins Forest", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "One can determine how old a tree is by counting its rings, but that requires either cutting the tree down or extracting a sample from the tree’s core. Can we estimate the tree’s age simply from its diameter?A forester measured 27 trees of the same species that had been cut down, and counted the rings to determine the ages of the trees. ", - "download": "https://dasl.datadescription.com/download/data/3277", - "filename": "old-tree", - "name": "How old is that Tree", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "As the number of oranges on a tree increases, the fruit tends to get smaller. The dataset gives numbers of oranges/tree and average weight/orange (in pounds).", - "download": "https://dasl.datadescription.com/download/data/3385", - "filename": "Oranges", - "name": "Oranges", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "An experiment on mung beans was performed\nto investigate the environmental effects of salinity and\nwater temperature on sprouting. Forty beans were randomly\nallocated to each of 36 petri dishes that were subject\nto one of four levels of Salinity (0, 4, 8, and 12 ppm)\nand one of three Temperatures (32°, 34°, or 36° C). After\n48 hours, the biomass of the sprouts in gm was measured. The percent of beans germinating is also recorded.", - "download": "https://dasl.datadescription.com/download/data/3458", - "filename": "Sprouts", - "name": "Sprouts", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Tree growth", - "download": "https://dasl.datadescription.com/download/data/3497", - "filename": "Tree-growth", - "name": "Tree growth", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Vineyards", - "download": "https://dasl.datadescription.com/download/data/3513", - "filename": "Vineyards", - "name": "Vineyards", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "John O. Rawlings and Susan E. Spruill\n\nThe data set ch5.dat contains the following variables:\n\n1. site: coded 1-6 corresponding to the location code used in Table 1.\n2. block: block within site coded 1, 2, ... within sites for the RCB designs;\n block=1 for all observations for the CRD designs, sites 5 and 6.\n3. rep: replication within site coded as missing in sites 1-4;\n coded as 1, 2, ... for replicates in the CRD design.\n4. ozone: target ozone treatment, coded 0.0=charcoal filtered air, \n 1.0=nonfiltered air, \"x.x\"=target level of ozone as multiple of \n ambient ozone level.\n5. rain: acidic rain treatment, coded as pH of rain solution.\n6. fam: genetic family, coded as 1, 2, ... within sites.\n7. ppmhrs: cumulative ozone exposure (ppm-h) during the two years of\n the trials.\n8. vwpH: cumulative exposure to acidic rain computed as vwpH \n = -log(sum(volume*hydrogen ion concentration)).\n9. biomass: total above ground biomass (g) after two growing seasons.\n10. diam: increment of diameter growth (mm) during the two growing seasons.\n11. DMA: whole-plot component of the covariate initial diameter (mm)\n expressed as the deviation of the whole-plot mean from the overall\n site mean.\n12. DMB: sub-plot component of the covariate initial diameter (mm)\n expressed as the deviation of the subplot mean from the whole-plot mean.\n13. D2HA: whole-plot component of the covariate initial volume, \n approximated as diameter squared times height, and expressed as\n the deviation of the whole-plot mean from the overall site mean.\n14. D2HB: sub-plot component of the covariate initial volume and\n expressed as the deviation of the subplot mean from the whole-plot mean.\n15. DMOT: depth to mottling (cm) of the clay soil; one measurement\n per whole-plot. \n\nMissing data are coded with '.'", - "download": "http://lib.stat.cmu.edu/datasets/csb/ch5.dat", - "filename": "Pine-Seedling", - "name": "Estimating Pine Seedling Response to Ozone and Acid Rain", - "number_format": 31, - "remove_quotes": true, - "separator": "SPACE", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": false - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The file ch8.dat contains the following variables:\n\nPop - population code, 1034 or 1040\nADH - 1 (cepa), 2 (het) or 3 (fist)\nIDH - 1 (cepa), 2 (het) or 3 (fist)\nPGI - 1 (cepa), 2 (het) or 3 (fist)\nfreq - frequency", - "download": "http://lib.stat.cmu.edu/datasets/csb/ch8.dat", - "filename": "Hybrid-Onions", - "name": "\nMixture Fraction and Linkage Analyses for Hybrid Onions", - "number_format": 31, - "remove_quotes": true, - "separator": "SPACE", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": false - } - ], - "subcategory_name": "Plants" - }, - { - "datasets": [ - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Data were collected from a mine in Cobar, NSW, Australia. At each of 38 sampling points, several measurements were taken, one of which is the 'true-width' of an ore-bearing rock layer. Also given are the co-ordinates t1 and t2 of of the data sites. Green and Silverman (1994) use this data set to illustrate thin-plate splines for fitting a smooth surface.", - "download": "http://www.statsci.org/data/oz/ore.txt", - "filename": "ore", - "name": "Wide of Ore-Bearing Layer", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The proportions of sand, silt and clay in soil samples are given for 8 contiguous sites. The sites extended over the crest and flank of a low rise in a valley underlain by marl near Albudeite in the province of Murcia, Spain. The sites were small areas of ground surface of uniform shape internally and delimited by relative discontinuities externally. Soil samples were obtained for each site at 11 random points within a 10m by 10m area centred on the mid-point of the site. All samples were taken from the same depth. The data give the sand, silt and clay content of each sample, expressed as a percentage of the total sand, silt and clay content. \nThe purpose of the study by Wright and Wilson (1979) was to determine whether the sites could be differentiated on the basis of their soil composition. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nSite \n\n1-8\n\nSand \n\nPercent sand\n\nSilt\n\nPercent silt\n\nClay\n\nPercent clay\n", - "download": "http://www.statsci.org/data/general/murcia.txt", - "filename": "murcia", - "name": "Composition of Soil from Murcia Province, Spain", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Meteor Crater in Arizona was the first recognized impact crater and was identified as such only in the 1920s. With the help of satellite images, more and more craters have been identified; now more than 180 are known. These, of course, are only a small sample of all the impacts the earth has experienced: Only 29% of earth’s surface is land, and many craters have been covered or eroded away. Astronomers have recog-nized a roughly 35 million-year cycle in the frequency of cratering, although the cause of this cycle is not fully understood.\nThe data hold information about craters. craters from the most recent 35Ma (million years) may be the more reliable data, and are suitable for analyses relating age and diameter.", - "download": "https://dasl.datadescription.com/download/data/3142", - "filename": "Craters", - "name": "Craters", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The file ch6.dat contains the following variables. \n\n\nSTRATA : National Marine Fisheries Service (NMFS) 4 digit strata\n designator in which the sample was taken \n \nSAMPLE : Sample number per year ranging from 1 to approximately 450\n\nLAT : Location in terms of latitude of each sample in the Atlantic Ocean \n\nLONG : Location in terms of longitude of each sample in the Atlantic Ocean\n \nTCATCH : Total number of scallops caught at the ith sample location\n\nPREREC : Number of scallops whose shell length is smaller than 70 millimeters\n \nRECRUITS : Number of scallops whose shell length is 70 millimeters or larger", - "download": "http://lib.stat.cmu.edu/datasets/csb/ch6.dat", - "filename": "Scallop-Abundance", - "name": "Geostatistical Estimates of Scallop Abundance", - "number_format": 31, - "remove_quotes": true, - "separator": "SPACE", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": false - } - ], - "subcategory_name": "Geology" - }, - { - "datasets": [ - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "-", - "download": "https://dasl.datadescription.com/download/data/3094", - "filename": "Carbon-footprint", - "name": "Carbon footprint", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "-", - "download": "https://dasl.datadescription.com/download/data/3095", - "filename": "Carbon-footprint-2015", - "name": "Carbon footprint 2015", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "-", - "download": "https://dasl.datadescription.com/download/data/3240", - "filename": "Gemstones", - "name": "Gemstones", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "It is a common belief that Yellowstone’s most famous geyser erupts once an hour at very predictable intervals. But, in fact, the intervals between eruptions can vary greatly. Can we predict the interval from, for example, the duration of the previous eruption? Are there other patterns in the data worth noting? ", - "download": "https://dasl.datadescription.com/download/data/3380", - "filename": "Old-Faithful", - "name": "Old Faithful", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Ozone levels (in parts per billion, ppb) were recorded at sites in New Jersey monthly between 1926 and 1971. Here are boxplots of the data for each month (over the 46 years), lined up in order (January = 1): ", - "download": "https://dasl.datadescription.com/download/data/3386", - "filename": "Ozone", - "name": "Ozone", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The National Interagency Fire Center reports statistics about wildfires. They report data from 1960, but the years 1960-1984 are so different from subsequent years that they can’t be analyzed together. These data are for 1985-2015. Is there a pattern over time? What is the relationship between the number of fires and the acres affected? Are fires getting larger or smaller on average?", - "download": "https://dasl.datadescription.com/download/data/3523", - "filename": "Wildfires-2015", - "name": "Wildfires 2015", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Geographical coordinates of the shoreline of the 17 islands that form the\nKodiak Island group. The two columns are\n\n 1. latitude\n 2. longitude\n\nmeasured in degrees and fractions of a degree. Each of the 17 groups of\ncoordinates is terminated by a pair of \"NA\"s, and the end of each group loops\nback to the beginning. For drawing maps, bear in mind that longitude is\nmeasured East to West, which is right to left. This suggests plotting\nnegative longitude instead of longitude. Also, to draw maps that \"look right\"\nto an Alaskan, you must take into account that in this part of the world the\naspect ratio of one degree latitude (y-axis) to one degree longitude (x-axis)\nis 1:1.8 (in terms of actual ground distance).", - "download": "http://lib.stat.cmu.edu/crab/kodiak", - "filename": "kodiak", - "name": "Geographical coordinates of the shoreline of Kodiak Island group", - "number_format": 31, - "remove_quotes": true, - "separator": "SPACE", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": false - } - ], - "subcategory_name": "Other" - } - ] - }, - { - "category_name": "Statistics", - "subcategories": [ - { - "datasets": [ - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The data give the year of founding for 40 New Zealand wineries.", - "download": "http://www.statsci.org/data/oz/wineries.txt", - "filename": "wineries", - "name": "Founding Dates of NZ Wineries", - "number_format": 31, - "remove_quotes": true, - "separator": "auto", - "simplify_whitespaces": true, - "skip_empty_parts": false, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The data give the selling price at auction of 32 antique grandfather clocks. Also recorded is the age of the clock and the number of people who made a bid. ", - "download": "http://www.statsci.org/data/general/auction.txt", - "filename": "auction", - "name": "Selling Price of Antique Grandfather Clocks", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The two columns of the data are the prices and year purchased for 124 Mazda cars, as taken from the classified section of the Melbourne Age during the course of 1991. Hence the age of the car at the time can be calculated and used to model car price. ", - "download": "http://www.statsci.org/data/oz/mazdas.txt", - "filename": "mazdas", - "name": "Age and Price of Mazda Cars", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The data show the capital value and annual rental value of 96 domestic properties in Auckland in 1991. The aim was to explore their relationship in the hope of being able to predict capital value from rental value, thus the latter is the explanatory variable in this case.", - "download": "http://www.statsci.org/data/oz/rentcap.txt", - "filename": "rentcap", - "name": "Capital and Rental Values of Auckland Properties", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The data give the selling price at auction of 32 antique grandfather clocks. Also recorded is the age of the clock and the number of people who made a bid. \n\n\n\n\nVariable\n\nDescription\n\n\n\n\nAge\n\nAge of the clock (years)\n\nBidders\n\nNumber of individuals participating in the bidding\n\nPrice\n\nSelling price (pounds sterling)\n", - "download": "http://www.statsci.org/data/general/auction.txt", - "filename": "auction_", - "name": "Selling Price of Antique Grandfather Clocks", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The data were collected to study the variation in rent paid in 1977 for agricultural land planted to alfalfa in Minnesota. The data include: \n\n\nVariable\n\nDescription\n\n\nRent\n \naverage rent per acre planted to alfalfa\nAllRent\n \naverage rent paid for all tillable land\nCows\n \ndensity of diary cows (number per square mile)\nPasture\n \nproportion of farmland used as pasture\nLiming\n \nYes if liming is required to grow alfalfa; No otherwise\n", - "download": "http://www.statsci.org/data/general/landrent.txt", - "filename": "landrent", - "name": "Rent for Land Planted to Alfalfa", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Monthly observations on various share price and financial variables were recorded from October 1991 to August 1997. Data collected by Francine Pritchard and Glen Dixon for their MS305 data analysis project in 1997. \n\n\nVariable\n\nDescription\n\n\nBank\n\nShare Price Index\nAllOrds\n\n\nDevelop\n\n\nMining\n\n\nGold\n\n\nBuild\n\n\nProp\n\n\nIndust\n\n\nEnergy\n\n\nFinance\n\n\nResource\n\n\nTransport\n\n\nRetail\n\n\nUnemploy\n\nUnemployment Rate\nCPI\n\nConsumer Price Index\nBankBill\n\n90 Day Bank Bill Interest Rate\n", - "download": "http://www.statsci.org/data/oz/bankbill.txt", - "filename": "bankbill", - "name": "90 Day Bank Bills", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The following data was collected in the 1960s at a house in south-east England. The weekly gas consumption (in 1000 cubic feet) and the average outside temperature (in degrees Celsius) was recorded for 26 weeks before and 30 weeks after cavity-wall insulation had been installed. The house thermostat was set at 20�C throughout. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nInsulate\n\nBefore or After\n\nTemp\n\nAverage outside temperature (C)\n\nGas\n\nGas consumption (1000's of cubic feet)\n", - "download": "http://www.statsci.org/data/general/insulgas.txt", - "filename": "insulgas", - "name": "House Insulation and Gas Consumption", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Age specific term life premium rates for a sum insured of $50,000 are given in the table. The first column is the age of insured, the next two columns are the rates for male smokers and non-smokers, and the last two columns are the rates for female smokers and non-smokers. The four separate sets of points may be plotted and cubic spline regression used to fit them.", - "download": "http://www.statsci.org/data/oz/insure.txt", - "filename": "insure", - "name": "Insurance Premiums", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The data give the Canadian automobile insurance experience for policy years 1956 and 1957 as of June 30, 1959. The data includes virtually every insurance company operating in Canada and was collated by the Statistical Agency (Canadian Underwriters' Association - Statistical Department) acting under instructions from the Superintendent of Insurance. The data given here is for private passenger automobile liability for non-farmers for all of Canada excluding Saskatchewan. \nThe variable Merit measures the number of years since the last claim on the policy. The variable Class is a collation of age, sex, use and marital status. The variables Insured and Premium are two measures of the risk exposure of the insurance companies. \n\n\n\n\nVariable\n\nDescription\n\n\n\n\nMerit\n\nMerit Rating:\n3 - licensed and accident free 3 or more years\n2 - licensed and accident free 2 years\n1 - licensed and accident free 1 year\n0 - all others\n\nClass\n\n1 - pleasure, no male operator under 25\n2 - pleasure, non-principal male operator under 25\n3 - business use\n4 - unmarried owner or principal operator under 25\n5 - married owner or principal operator under 25\n\nInsured\n\nEarned car years\n\nPremium\n\nEarned premium in 1000's\n(adjusted to what the premium would have been had all cars been written at 01 rates)\n\nClaims\n\nNumber of claims\n\nCost\n\nTotal cost of the claim in 1000's of dollars\n", - "download": "http://www.statsci.org/data/general/carinsca.txt", - "filename": "carinsca", - "name": "Canadian Automobile Insurance Claims for 1957-1958", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The data give details of third party motor insurance claims in Sweden for the year 1977. \n\"In Sweden all motor insurance companies apply identical risk arguments to classify customers, and thus their portfolios and their claims statistics can be combined. The data were compiled by a Swedish Committee on the Analysis of Risk Premium in Motor Insurance. The Committee was asked to look into the problem of analyzing the real influence on claims of the risk arguments and to compare this structure with the actual tariff.\" \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nKilometres \n\nKilometres travelled per year\n1: < 1000\n2: 1000-15000\n3: 15000-20000\n4: 20000-25000\n5: > 25000\n\nZone \n\nGeographical zone\n1: Stockholm, Göteborg, Malmö with surroundings\n2: Other large cities with surroundings\n3: Smaller cities with surroundings in southern Sweden\n4: Rural areas in southern Sweden\n5: Smaller cities with surroundings in northern Sweden\n6: Rural areas in northern Sweden\n7: Gotland \n\nBonus\n\nNo claims bonus. Equal to the number of years, plus one, since last claim\n\nMake\n\n1-8 represent eight different common car models. All other models are combined in class 9\n\nInsured\n\nNumber of insured in policy-years\n\nClaims\n\nNumber of claims\n\nPayment\n\nTotal value of payments in Skr\n\n\n\n", - "download": "http://www.statsci.org/data/general/motorins.txt", - "filename": "motorins", - "name": "Third Party Motor Insurance in Sweden", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The data give the average claims for damage to the owner's car for privately owned and comprehensively insured vehicles in Britain in 1975. Averages are given in pounds sterling adjusted for inflation. \n\n\n\n\nVariable\n\nDescription\n\n\n\n\nOwnerAge\n\nPolicy-holder's age in years, categorized into 8 levels\n\nModel\n\nType of car, in 4 groups\n\nCarAge\n\nVehicle age in years, categorized into 4 levels\n\nNClaims\n\nNumber of claims\n\nAveCost\n\nAverage cost of each claim in pounds\n", - "download": "http://www.statsci.org/data/general/carinsuk.txt", - "filename": "carinsuk", - "name": "British Car Insurance Claims for 1975", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Monthly data relating to hotels, motels and guesthouses in Victoria, from January 1980 to June 1995. First column: total number of room nights occupied; Second column: total takings from accommodation. ", - "download": "http://www.statsci.org/data/oz/motel.txt", - "filename": "motel", - "name": "Hotels, Motels and Guesthouses in Victoria", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The data list the CPI (Consumer Price Index) figures for five countries from 1985 to 1994. The countries are Australia, Canada, New Zealand, the United Kingdom and the United States. Each index is based on the December Quarter 1993 (1000).", - "download": "http://www.statsci.org/data/oz/cpifive.txt", - "filename": "cpifive", - "name": "CPI for Five Countries", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Quarterly CPI indices for Brisbane for food, clothing, housing etc, from June 1972 to September 1997. \nThe groups are: Food, Clothing, Housing, Household equipment and operation, Transportation, Tobacco and Alcohol, Health and personal care, Recreation and education, and All groups. The CPI are standardized so that the year 1989-90 is 100.0. ", - "download": "http://www.statsci.org/data/oz/cpibris.txt", - "filename": "cpibris", - "name": "Brisbane Consumer Price Indices", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Quarterly price indices for established homes in Australian capital cities, from June 1986 to June 1997. The price indices are standardized so that the year 1989-1990 is 100.0 for each city. ", - "download": "http://www.statsci.org/data/oz/houses.txt", - "filename": "houses", - "name": "House Price Indexes", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "To shorten the time it takes him to make his favorite pizza, a student designed an experiment to test the effect of sugar and milk on the activation times for baking yeast. Specifically, he tested four different recipes and measured how many seconds it took for the same amount of dough to rise to the […] ", - "download": "https://dasl.datadescription.com/download/data/3042", - "filename": "activating-baking-yeast", - "name": "Activating baking yeast", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The American International Group (AIG) was once the 18th largest corporation in the world. By early 2007 AIG had assets of $1 trillion, $110 billion in revenues, 74 million customers and 116,000 employees in 130 countries and jurisdictions. Yet just 18 months later, AIG found itself on the brink of failure and in need of emergency […] ", - "download": "https://dasl.datadescription.com/download/data/3046", - "filename": "AIG-daily", - "name": "AIG daily", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The American International Group (AIG) was once the 18th largest corporation in the world. By early 2007 AIG had assets of $1 trillion, $110 billion in revenues, 74 million customers and 116,000 employees in 130 countries and jurisdictions. Yet just 18 months later, AIG found itself on the brink of failure and in need of emergency […]", - "download": "https://dasl.datadescription.com/download/data/3047", - "filename": "AIG-monthly", - "name": "AIG monthly", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "A sample of model 2011 cars from an online information service colleted to see how fuel efficiency (as highway mpg) relates to the cost (MSRP) ", - "download": "https://dasl.datadescription.com/download/data/3050", - "filename": "All-the-efficiency", - "name": "All the efficiency", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The price of delicious apples and regular gas are components of the Consumer Price Index. The data give those prices monthly for the year 2006 ", - "download": "https://dasl.datadescription.com/download/data/3055", - "filename": "Apples-and-gas", - "name": "Apples and gas", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "You have decided to invest in a bond fund and plan to limit your choice of funds to Morningstar “medalist” funds. But now you must choose between a taxable fund and a municipal bond fund that is at least partially tax-free. Which is better? Here are the % returns for the three-year period leading up", - "download": "https://dasl.datadescription.com/download/data/3080", - "filename": "Bond-funds", - "name": "Bond funds", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Number of sales people working in a bookstore and sales (in $1000) that day. These are realistic but invented data. ", - "download": "https://dasl.datadescription.com/download/data/3081", - "filename": "Bookstore-sales", - "name": "Bookstore sales", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "In 2015, the website NewGeography.com listed its ranking of the best cities for job growth in the United States. Nonfarm employment is also provided", - "download": "https://dasl.datadescription.com/download/data/3082", - "filename": "Boomtowns-2015", - "name": "Boomtowns 2015", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Home prices in two neighborhoods near San Francisco. Palo Alto is an older neighborhood and Foster City, a newer one. How do prices compare?", - "download": "https://dasl.datadescription.com/download/data/3104", - "filename": "CA-House-Prices", - "name": "CA House Prices", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "-", - "download": "https://dasl.datadescription.com/download/data/3097", - "filename": "Car-discounts", - "name": "Car discounts", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "-", - "download": "https://dasl.datadescription.com/download/data/3098", - "filename": "Car-origins", - "name": "Car origins", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The S&P/Case-Shiller Home Price Indices track changes in the value of residential real estate nationally and in 20 metropolitan regions. (Some of these indices are actually traded on the Chicago Mercantile Exchange.) The data set Case-Shiller by City gives the monthly index values for each of the 20 cities tracked by the Case-Shiller index and […] ", - "download": "https://dasl.datadescription.com/download/data/3102", - "filename": "Case-Shiller-by-city", - "name": "Case-Shiller by city", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Beginning in 2017, public companies will be required to disclose the ratio of CEO pay to median worker pay. The Glassdoor Economic Research Blog has published the data for 2014. The data includes CEO identities, companies, CEO compensation, median worker compensation (compiled by Glassdoor), and the ratio of CEO to worker compensation.", - "download": "https://dasl.datadescription.com/download/data/3105", - "filename": "CEO-Compensation-2014", - "name": "CEO Compensation 2014", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "-", - "download": "https://dasl.datadescription.com/download/data/3106", - "filename": "CEO-Salary-2012", - "name": "CEO Salary 2012", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Coffee is the world’s second largest\nlegal export commodity (after oil) and is the second largest\nsource of foreign exchange for developing nations. The\nUnited States consumes about one-fifth of the world’s coffee.\nThe International Coffee Organization (ICO) computes\na coffee price index using Colombian, Brazilian, and\na mixture of other coffee data. Data are provided for the\nmonthly average ICO price index (in $US) from Jan 2009 to December 2017c", - "download": "https://dasl.datadescription.com/download/data/3119", - "filename": "Coffee-prices-2017", - "name": "Coffee-prices-2017", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The cost of a variety of common items in 576 cities around the world in $, adjusted so that New York, U.S.A. is 100.", - "download": "https://dasl.datadescription.com/download/data/3120", - "filename": "COLall-2016", - "name": "COLall 2016", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Facts about companies selected from the Forbes 500 list for 1986. This is a 1/10 systematic sample from the alphabetical list of companies. The Forbes 500 includes all companies in the top 500 on any of the criteria, and thus has almost 800 companies in the list.", - "download": "https://dasl.datadescription.com/download/data/3125", - "filename": "Companies", - "name": "Companies", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Facts about companies selected from the Forbes 500 list for 2000", - "download": "https://dasl.datadescription.com/download/data/3595", - "filename": "Companies-Quickstart", - "name": "Companies Quickstart", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "-", - "download": "https://dasl.datadescription.com/download/data/3129", - "filename": "Consumer-spending", - "name": "Consumer spending", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "-", - "download": "https://dasl.datadescription.com/download/data/3130", - "filename": "Consumer-spending-post-holiday", - "name": "Consumer spending post holiday", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Numbeo.com lists the cost of living (COL) for 576 cities around the world. They report the typical cost of a number of staples. The cost of living is made up of many components. These data report a variety of everyday costs. How are they related? Can an overall cost of living be constructed from them?", - "download": "https://dasl.datadescription.com/download/data/3132", - "filename": "Cost-of-living-2016", - "name": "Cost of living 2016", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "-", - "download": "https://dasl.datadescription.com/download/data/3135", - "filename": "Cost-of-Living-2017", - "name": "Cost of Living 2017", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Cost of Living Index (Excl. Rent) is a relative indicator of consumer goods prices, including groceries, restaurants, transportation and utilities. Cost of Living Index doesn’t include accommodation expenses such as rent or mortgage. If a city has a Cost of Living Index of 120, it means Numbeo estimates it is 20% more expensive than New York (excluding rent).\nRent Index is an estimation of prices of renting apartments in the city compared to New York City. If Rent index is 80, Numbeo estimates that price of rents in that city is on an average 20% less than the price in New York.\nGroceries Index is an estimation of grocery prices in the city compared to New York City. To calculate this section, Numbeo uses weights of items in the “Markets” section for each city.\nRestaurants Index is a comparison of prices of meals and drinks in restaurants and bars compared to NYC.\nCost of Living Plus Rent Index is an estimation of consumer goods prices including rent comparing to New York City.\nLocal Purchasing Power shows relative purchasing power in buying goods and services in a given city for the average wage in that city. If domestic purchasing power is 40, this means that the inhabitants of that city with the average salary can afford to buy on an average 60% less goods and services than New York City residents with an average salary..", - "download": "https://dasl.datadescription.com/download/data/3136", - "filename": "Cost-of-living-2018", - "name": "Cost of living 2018", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The Consumer Price Index (CPI) summarizes the cost of a representative market basket\nof goods that includes groceries, restaurants, transportation, utilities, and medical\ncare. Global companies often use the CPI to determine living allowances and salaries\nfor employees. Inflation is often measured by how much the CPI changes from year to\nyear. Relative CPIs can be found for different cities. We have data giving CPI components\nrelative to New York City. For New York City, each index is 100(%).", - "download": "https://dasl.datadescription.com/download/data/3139", - "filename": "CPI-Worldwide-2016", - "name": "CPI Worldwide 2016", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "A credit card company wants to see how much customers in a particular segment of\ntheir market use their credit card. They have provided data on the amount\nspent by 500 selected customers during a 3-month period and have asked you to\nsummarize the expenditures. (Data are realistic, but disguised for confidentiality.)", - "download": "https://dasl.datadescription.com/download/data/3146", - "filename": "Credit-card-charges", - "name": "Credit card charges", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Peninsula Creameries sells both cottage cheese and ice cream. The CEO recently noticed that in months when the company sells more cottage cheese, it seems to sell more ice cream as well.", - "download": "https://dasl.datadescription.com/download/data/3152", - "filename": "Dairy-sales", - "name": "Dairy sales", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Data on raw diamonds from the internet. Price of a diamond depends on its Carat weight, color, clarity, and cut. The data are for 2690 diamonds of a variety of weights, colors, clarity, and cut. What predicts the price? Do the variables need to be reexpressed?", - "download": "https://dasl.datadescription.com/download/data/3161", - "filename": "Diamonds_", - "name": "Diamonds", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The Dow Jones stock index measures the performance of the stocks of America’s largest companies. A regression of the Dow prices on years 1972–2015 appears to be successful, but the residuals raise some questions.", - "download": "https://dasl.datadescription.com/download/data/3176", - "filename": "Dow-Jones-2015", - "name": "Dow Jones 2015", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Quarterly e-commerce retail sales (in millions of dollars) in the United States from 1999 to 2008 ", - "download": "https://dasl.datadescription.com/download/data/3180", - "filename": "E-commerce", - "name": "E-commerce", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "When implementing a packaged\nEnterprise Resource Planning (ERP) system, many companies\nreport that the module they first install is Financial\nAccounting. Among the measures used to gauge the\neffectiveness of their ERP system implementation is acceleration\nof the financial close process. The data hold a sample of\n8 companies that report their average time (in weeks) to\nfinancial close before and after the implementation of their\nERP system.", - "download": "https://dasl.datadescription.com/download/data/3191", - "filename": "ERP-Effectiveness", - "name": "ERP Effectiveness", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Sales (in $) for one week were collected for 18 stores in a food store chain in the northeastern United States. The stores and the towns they are located in vary in size.", - "download": "https://dasl.datadescription.com/download/data/3213", - "filename": "Food-sales", - "name": "Food sales", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The U.S. government provides fuel economy (in miles per gallon) and other information about late model cars sold in the US. How would you model the relationship between fuel economy and engine displacement (in liters)? Are there any cars that don’t fit the model? Can you explain why? ", - "download": "https://dasl.datadescription.com/download/data/3225", - "filename": "Fueleconomy-2016", - "name": "Fuel economy 2016", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Weekly gas prices for regular gas in the United States as reported by the U.S. Energy Information Administration for 2009 through August 2016 ", - "download": "https://dasl.datadescription.com/download/data/3232", - "filename": "Gas-prices-2016", - "name": "Gas prices 2016", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "-", - "download": "https://dasl.datadescription.com/download/data/3233", - "filename": "Gas-Prices-2017", - "name": "Gas Prices 2017", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Monthly gas prices for all grades and all formulations ($/gallon) in the United States as reported by the U.S. Energy Information Administration for 1993 through August 2018. Prices are available at the cite for all weeks. Data here are for the final week of each month.", - "download": "https://dasl.datadescription.com/download/data/3234", - "filename": "Gas-prices-2018", - "name": "Gas prices 2018", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Many drivers of cars that can run on regular gas actually buy premium in the belief that they will get better gas mileage. To test that belief, we use 10 cars from a company fleet in which all the cars run on regular gas. Each car is filled first with either regular or premium gasoline, decided by a coin toss, and the mileage for that tankful is recorded. Then the mileage is recorded again for the same cars for a tankful of the other kind of gaso-line. We don’t let the drivers know about this experiment.", - "download": "https://dasl.datadescription.com/download/data/3235", - "filename": "Gas-prices-monthly", - "name": "Gas prices monthly", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "-", - "download": "https://dasl.datadescription.com/download/data/3238", - "filename": "GDP-state", - "name": "GDP by state", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "-", - "download": "https://dasl.datadescription.com/download/data/3239", - "filename": "GDP-growth-2017", - "name": "GDP growth 2017", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Daily opening and closing stock prices (adjusted for splits and dividends) for Google, Inc. from Aug 19, 2004 through June 21, 2013 ", - "download": "https://dasl.datadescription.com/download/data/3247", - "filename": "Google-stock-prices", - "name": "Google stock prices", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "A graphite manufacturer makes long\nrolls of flexible graphite to be used to seal components in\ncombustion engines. The specifications state that the mean\nstrength should be 21.2 ounces per square yard with a\nstandard deviation of 0.29. Further specifications state that\nno roll should have strength less than 20.2 or more than\n22.2 ounces per square yard. If there is a defect in terms\nof the strength of the graphite rolls, the seal will not hold.\nAfter the roll is created, a beta scanner takes readings of\nthe basis weight in ounces per square yard. The data is\nseparated into 10 lanes with 20 scans in each lane. A sample\nconsists of one roll from each lane. The results from 20\nsamples follow are in the data.", - "download": "https://dasl.datadescription.com/download/data/3250", - "filename": "Graphite-production", - "name": "Graphite production", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "WinCo Foods, a large discount grocery\nretailer in the western United States, promotes itself as the lowest priced grocery retailer. In newspaper ads WinCo Foods published a price comparison for products between WinCo and several competing grocery retailers. One of the retailers compared against WinCo was Walmart, also known as a low price competitor. WinCo selected a variety of products, listed the price of the product charges at each retailer, and showed the sales receipt to prove the prices at WinCo were the lowest in the area. A sample of the products and their price comparison at both WinCo and Walmart are given.", - "download": "https://dasl.datadescription.com/download/data/3251", - "filename": "Grocery-prices", - "name": "Grocery prices", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Health expenditures", - "download": "https://dasl.datadescription.com/download/data/3260", - "filename": "Health-expenditures", - "name": "Health expenditures", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The price (per barrel) of oil has fluctuated over time. Various attempts to model it are generally not successful. ", - "download": "https://dasl.datadescription.com/download/data/3266", - "filename": "Historica-Oil-Prices-2016", - "name": "Historical Oil Prices 2016", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Holiday shopping", - "download": "https://dasl.datadescription.com/download/data/3267", - "filename": "Holiday-shopping", - "name": "Holiday shopping", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Holiday spending", - "download": "https://dasl.datadescription.com/download/data/3268", - "filename": "Holiday-spending", - "name": "Holiday spending", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Home depot sales", - "download": "https://dasl.datadescription.com/download/data/3269", - "filename": "Home-depot-sales", - "name": "Home depot sales", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Home Price Index 2017", - "download": "https://dasl.datadescription.com/download/data/3270", - "filename": "Home-Price-Index-2017", - "name": "Home Price Index 2017", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "House prices and properties in New York. What properties of a house can predict its price? Can we use such a model to identify houses that are extraordinarily expensive or inexpensive? ", - "download": "https://dasl.datadescription.com/download/data/3275", - "filename": "Housing-prices", - "name": "Housing prices", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "House prices and properties in New York. What properties of a house can predict its price? Can we use such a model to identify houses that are extraordinarily expensive or inexpensive? ", - "download": "https://dasl.datadescription.com/download/data/3276", - "filename": "Housing-prices-GE19", - "name": "Housing prices GE19", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "How are housing costs related to median family income?", - "download": "https://dasl.datadescription.com/download/data/3283", - "filename": "Income-housing", - "name": "Income and housing", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Income vs Hours 2013", - "download": "https://dasl.datadescription.com/download/data/3286", - "filename": "Income-vs-Hours-2013", - "name": "Income vs Hours 2013", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The U.S. Consumer Price Index and year, every 5 years since 1916. These are the values for January of each year. What is the trend? Can we model it with a linear regression? ", - "download": "https://dasl.datadescription.com/download/data/3291", - "filename": "Inflation-2016", - "name": "Inflation 2016", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Average annual interest rates (banks prime lending) in the United States from 1966 through 2009 ", - "download": "https://dasl.datadescription.com/download/data/3296", - "filename": "Interest-rates-2009", - "name": "Interest rates 2009", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "he amount charged for mortgages may be related to the total value of mortgage loans in the US. Can that relationship be modeled? Does it depend as well on the year? Consider a rotating plot of interest rate, mortgage total, and year. ", - "download": "https://dasl.datadescription.com/download/data/3297", - "filename": "Interest-mortgage", - "name": "Interest rates and mortgages", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The amount charged for mortgages may be related to the total value of mortgage loans in the US. Can that relationship be modeled? Does it depend as well on the year? Consider a rotating plot of interest rate, mortgage total, and year. ", - "download": "https://dasl.datadescription.com/download/data/3298", - "filename": "Interest-mortgage-2015", - "name": "Interest rates and mortgages 2015", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "This example is based on 1998 case study written by J. Hunt, E. Landry, and J. Rao as part of the Babson College case series. The data and setting used in this example are based on the actual case study, but the data have been modified and the conclusions are fictitious.", - "download": "https://dasl.datadescription.com/download/data/3308", - "filename": "Komtek-Technologies", - "name": "Komtek Technologies", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Real estate agents want to set correctly\nthe price of a house that’s about to go on the real estate\nmarket. They must choose a price that strikes a balance\nbetween one that is so high that the house takes too long\nto sell and one that’s so low that not enough value will go\nto the homeowner. One appraisal method is the “Comparative\nMarket Analysis” approach by which the market\nvalue of a house is based on recent sales of similar homes\nin the neighborhood. Because no two houses are exactly\nthe same, appraisers have to adjust comparable homes for\nsuch features as extra square footage, bedrooms, fireplaces,\nupgrading, parking facilities, swimming pool, lot size, location,\nand so on. The appraised market values and the selling\nprices of 45 homes from the same region are given.", - "download": "https://dasl.datadescription.com/download/data/3328", - "filename": "Market-value", - "name": "Market value", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Marketing managers salaries", - "download": "https://dasl.datadescription.com/download/data/3327", - "filename": "Marketing-managers-salaries", - "name": "Marketing managers salaries", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Quarterly median weekly earnings from the first quarter of 2003 through the first quarter of 2013 for men, 25 years of age or older, in the United States ", - "download": "https://dasl.datadescription.com/download/data/3336", - "filename": "Men-weekly-earnings-2013", - "name": "Men’s weekly earnings 2013", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Movie budgets", - "download": "https://dasl.datadescription.com/download/data/3347", - "filename": "Movie-budgets", - "name": "Movie budgets", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Does money purchase a good movie? Is the US Gross revenue related to either the budge or the Rotten Tomatoes score? The dataset holds data on 609 recent releases that includes the USGross (in $M), the Budget ($M), the Run Time (minutes), and the score given by the critics on the Rotten Tomatoes website. ", - "download": "https://dasl.datadescription.com/download/data/3349", - "filename": "Movie-profits", - "name": "Movie profits", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Mutual fund flows", - "download": "https://dasl.datadescription.com/download/data/3354", - "filename": "Mutual-fund-flows", - "name": "Mutual fund flows", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "On December 30, 2016, the Standard and Poor’s (S&P) 500 index hit an all-time high. During 2016, the S&P returned 12.25%. Here is a histogram of the 2016 net returns (total return – annual expenses) for Money Magazine’s 50 Best Mutual Funds and ETFs. The net returns are computed from the data given by Money Magazine.", - "download": "https://dasl.datadescription.com/download/data/3353", - "filename": "Mutual-funds-2016", - "name": "Mutual funds 2016", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "A study by the U.S. Small\nBusiness Administration used historical data to model the\nGDP per capita of 24 of the countries in the Organization\nfor Economic Cooperation and Development(OECD). The researchers hoped to show that more regulation leads to lower GDP/Capita. The multiple regression with all terms does have a significant P-value for Economic Regulation Index.\nHowever, Primary Education is not a significant predictor. If it is removed from the model, then OECD Regulation is no longer significant at .05. Was it added to the model just to judge the P-value of OECD regulation down to permit a publication that claimed an effect?\nCheck to see whether you think there is such an effect.", - "download": "https://dasl.datadescription.com/download/data/3373", - "filename": "OECD-economic-regulations", - "name": "OECD economic regulations", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "OECD GDP", - "download": "https://dasl.datadescription.com/download/data/3374", - "filename": "OECD-GDP", - "name": "OECD GDP", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "OECD GDP Growth", - "download": "https://dasl.datadescription.com/download/data/3375", - "filename": "OECD-GDP-Growth", - "name": "OECD GDP Growth", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "OECD Unemployment", - "download": "https://dasl.datadescription.com/download/data/3376", - "filename": "OECD-Unemployment", - "name": "OECD Unemployment", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The price (per barrel) of oil has fluctuated over time. Various attempts to model it are generally not successful. The data include both the inflation-adjusted prices of a barrel of oil from 1968 to 2016 and two prediction models. ", - "download": "https://dasl.datadescription.com/download/data/3377", - "filename": "Oil-prices-2016", - "name": "Oil prices 2016", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Online Shopping", - "download": "https://dasl.datadescription.com/download/data/3384", - "filename": "Online-Shopping", - "name": "Online Shopping", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Sales volume and price of a slice of plain pizza ($) in Baltimore, Dallas, Chicago, and Denver for 156 weeks. How are prices and sales volumes related? Are patterns the same across cities? ", - "download": "https://dasl.datadescription.com/download/data/3395", - "filename": "Pizza-prices", - "name": "Pizza prices", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Poverty and Region 2015", - "download": "https://dasl.datadescription.com/download/data/3403", - "filename": "Poverty-and-Region-2015", - "name": "Poverty and Region 2015", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "UBS (one of the largest banks in the world) prepared\na report comparing prices, wages, and other economic conditions in cities around the world for it’s international clients. Some of the variables it measured in 73 cities are Cost of Living, Food Costs, Average Hourly Wage, average number of Working Hours per Year, average number of Vacation Days, hours of work (at the average wage) needed to buy an iPhone, minutes of work needed to buy a Big Mac, and Women’s Clothing Cost.", - "download": "https://dasl.datadescription.com/download/data/3405", - "filename": "Prices-Earnings", - "name": "Prices and Earnings", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The owner of a small organic food\nstore was concerned about her sales of a specialty yogurt\nmanufactured in Greece. As a result of increasing fuel\ncosts, she recently had to increase its price. To help boost\nsales, she decided to place the product on a different shelf\n(near eye level for most consumers) and in a location near\nother popular international products. She kept track of\nsales (number of containers sold per week) for six months\nafter she made the change.", - "download": "https://dasl.datadescription.com/download/data/3410", - "filename": "Product-placement", - "name": "Product placement", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "A company is producing and marketing\nnew reading activities for elementary school children that\nit believes will improve reading comprehension scores. A\nresearcher randomly assigns third graders to an eight-week\nprogram in which some will use these activities and others\nwill experience traditional teaching methods. At the end of\nthe experiment, both groups take a reading comprehension\nexam. Do these results suggest that the new activities\nare better?", - "download": "https://dasl.datadescription.com/download/data/3411", - "filename": "Product-testing", - "name": "Product testing", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Productivity 2016", - "download": "https://dasl.datadescription.com/download/data/3409", - "filename": "Productivity-2016", - "name": "Productivity 2016", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "As a class project, students in a large Statistics class collected publicly available information on recent home sales in their hometowns. There are 894 properties. These are not a random sample, but they may be representative of home sales during a short period of time, nationwide. Among the variables available is an indication of whether the home was in an urban, suburban, or rural setting.", - "download": "https://dasl.datadescription.com/download/data/3423", - "filename": "Real-Estate", - "name": "Real Estate", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Real estate sample 1200", - "download": "https://dasl.datadescription.com/download/data/3423", - "filename": "Real-estate-sample-1200", - "name": "Real estate sample 1200", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Regular gas 2017", - "download": "https://dasl.datadescription.com/download/data/3426", - "filename": "Regular-gas-2017", - "name": "Regular gas 2017", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Retail trade index", - "download": "https://dasl.datadescription.com/download/data/3427", - "filename": "Retail-trade-index", - "name": "Retail trade index", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "A sample from Fortune 500 companies", - "download": "https://dasl.datadescription.com/download/data/3434", - "filename": "Sales-profits", - "name": "Sales and profits", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Prices of homes in Saratoga NY along with facts about them. Good basis for multiple regressions to predict the Price of the house. But several predictors are collinear. ", - "download": "https://dasl.datadescription.com/download/data/3437", - "filename": "Saratoga-house-prices", - "name": "Saratoga house prices", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Prices of homes in Saratoga NY along with facts about them. Good basis for multiple regressions to predict the Price of the house. But several predictors are collinear. ", - "download": "https://dasl.datadescription.com/download/data/3436", - "filename": "Saratoga-houses", - "name": "Saratoga houses", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "A group of Statistics students cut ads out of magazines. They were careful to find two ads for each of 10 similar items, one with a sexual image and one without. They arranged the ads in random order and had 39 subjects look at them for one minute. Then they asked the subjects to list as many of the products as they could remember. Their data are shown in the table. Is there evidence that the sexual images mattered?", - "download": "https://dasl.datadescription.com/download/data/3444", - "filename": "Sex-sells", - "name": "Sex sells", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Researchers studying how a car’s fuel efficiency (in Miles Per Gallon) varies with its Speed drove a compact car 200 miles at various speeds on a test track. Their data are shown in the table. ", - "download": "https://dasl.datadescription.com/download/data/3454", - "filename": "Slower-is-cheaper", - "name": "Slower is cheaper", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The data give the federal rate on 3-month Treasury bills from 1950 to 1980 and Years Since 1950. ", - "download": "https://dasl.datadescription.com/download/data/3477", - "filename": "TBill-rates-2016", - "name": "TBill rates 2016", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Tiffany was founded in 1837, when Charles Lewis Tiffany opened his first store in downtown Manhattan. Tiffany retails and distributes a selection of Tiffany & Co. brand jewelry at a range of prices. Today, more than 150 Tiffany & Co. stores sell to customers in U.S. and international markets.\nThe dataset holds quarterly sales data from 2005 through the middle of 2017. The data are suitable for time series modeling.", - "download": "https://dasl.datadescription.com/download/data/3482", - "filename": "Tiffany", - "name": "Tiffany 2017", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Time on market", - "download": "https://dasl.datadescription.com/download/data/3483", - "filename": "Time-on-market", - "name": "Time on market", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Are people who use tobacco products more likely to consume alcohol? Here are data on household spending (in pounds) taken by the British government on 11 regions in Great Britain. Do tobacco and alcohol spending appear to be related? What questions do you have about these data? What conclusions can you draw? ", - "download": "https://dasl.datadescription.com/download/data/3485", - "filename": "Tobacco-and-alcohol", - "name": "Tobacco and alcohol", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Daily closing stock prices for Toyota Motor Manufacturing from April 1, 2008, through June 21, 2013 ", - "download": "https://dasl.datadescription.com/download/data/3491", - "filename": "Toyota-stock-prices-2013", - "name": "Toyota stock prices 2013", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "US Unemployment rate from 1/1/2003 to 8/1/17. ", - "download": "https://dasl.datadescription.com/download/data/3507", - "filename": "Unemployment-2017", - "name": "Unemployment 2017", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Kelly's Blue Book: https://www.kbb.com/cars-for-sale/ accessed on 31 Aug 2017 using zip code 94305 200 mile radius BMW M5", - "download": "https://dasl.datadescription.com/download/data/3508", - "filename": "Used-BMW", - "name": "Used BMW M5 2017", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "How does the age of a used car influence its price? This is a small enough data set to find a model with a calculator. ", - "download": "https://dasl.datadescription.com/download/data/3509", - "filename": "Used-cars", - "name": "Used cars 2014", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The web site www.autotrader.com lists cars for sale. On January 22 2017,\nit listed 55 used Honda Civics for sale by owner. From those listings, we extracted the asking price ($), the mileage, and the model year (from which we computed the age of the car at the time the data were collected\nQuestions include how to best predict the price from mileage and age and whether any of the cars is a particularly good buy.\nOne care is a particularly old (1989) car that has relatively low mileage for such an old car. The seller claims it hasn’t been driven for several years. \nIt looks like Price might benefit from re-expression by logs.", - "download": "https://dasl.datadescription.com/download/data/3510", - "filename": "Used-Civics", - "name": "Used Civics 2017", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The data give the gross domestic product (GDP) of the United States in trillions of 2009 dollars and time. ", - "download": "https://dasl.datadescription.com/download/data/3511", - "filename": "USGDP-2016", - "name": "USGDP 2016", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Walmart revenue", - "download": "https://dasl.datadescription.com/download/data/3514", - "filename": "Walmart-revenue", - "name": "Walmart revenue", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Gallup Poll of 1015 U.S. adults on April 9 – 12, 2015. Respondents were classified as high income (over $75,000), middle income ($30k–$75k), or low income (less than $30k). Those polled were asked for their views on redistributing U.S. wealth by heavily taxing the rich. Counts are reconstructed from percentages published by Gallup. ", - "download": "https://dasl.datadescription.com/download/data/3518", - "filename": "Wealth-Redistribution", - "name": "Wealth Redistribution", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Quarterly sales of Whole Foods Markets from 1995 through 2016. Whole Foods was purchased by Amazon in 2017, so 2016 is the final complete year prior to the merger. The data show a strong seasonal component even though food sales should not be seasonal. ", - "download": "https://dasl.datadescription.com/download/data/3522", - "filename": "Whole-Foods", - "name": "Whole Foods 2016", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Wine production", - "download": "https://dasl.datadescription.com/download/data/3529", - "filename": "Wine-production", - "name": "Wine production", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Quarterly median weekly earnings for U.S. women 25 years of age or older. Data are provided from the first quarter of 2003 through the first quarter of 2013. ", - "download": "https://dasl.datadescription.com/download/data/3535", - "filename": "Women-earnings", - "name": "Women’s weekly earnings 2013", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Youth Unemployment 2016", - "download": "https://dasl.datadescription.com/download/data/3546", - "filename": "Youth-Unemployment-2016", - "name": "Youth Unemployment 2016", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "http://jse.amstat.org/datasets/aaup.txt", - "download": "http://jse.amstat.org/datasets/aaup.dat.txt", - "filename": "AAUP", - "name": "AAUP Faculty Salary data", - "number_format": 31, - "remove_quotes": true, - "separator": ",", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": false - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The dataset bestbuy.day contains monthly data on computer usage \n(MIPS) and total number of stores from August 1996 to July 2000. \nAdditionally, information on the planned number of stores through \nDecember 2001 is available. These data can be used to compare \ntime-series forecasting with trend and seasonality components and \ncausal forecasting based on simple linear regression. The simple \nlinear regression model exhibits unequal error variances, suggesting \na transformation of Y.", - "download": "http://jse.amstat.org/datasets/bestbuy.dat.txt", - "filename": "Best_Buy", - "name": " BestBuy", - "number_format": 31, - "remove_quotes": true, - "separator": "SPACE", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": false - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The dollar amount for a monthly (January 1991 through December 2000) \nhousehold electric bill is presented as a time series. In addition, \npotential explanatory variables are included. Twelve representative \nmonthly values are provided for the average temperature, for \nheating degree days, and for cooling degree days (not for each \nmonth for each year). Additional variables give the family size \neach month and indicate when a new electric meter and new heating \nand cooling equipment was installed. To convert the billing amount \nto estimated power consumption, a tiered rate function (supplied \nin the accompanying Instructor's Manual) and the costs of \nassociated riders (provided here) must be used. Consumption \nestimates resulting from this information are supplied.\t", - "download": "http://jse.amstat.org/datasets/electricbill.dat.txt", - "filename": "electricbill", - "name": " Electric Bill Data", - "number_format": 31, - "remove_quotes": true, - "separator": "SPACE", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": false - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The data file contains information on 76 single-family homes inEugene, Oregon during 2005. This dataset is suitable for a completemultiple linear regression analysis of home price data that coversmany of the usual regression topics, including interaction andpredictor transformations. Whereas realtors use experience and localknowledge to subjectively value a house based on its characteristics(size, amenities, location, etc.) and the prices of similar housesnearby, regression analysis can provide an alternative that moreobjectively models local house prices using these same data.SOURCES:The data were provided by Victoria Whitman, a realtor in Eugene, in2005. The data were used in a case study in Pardoe (2006).", - "download": "http://jse.amstat.org/datasets/homes76.dat.txt", - "filename": "homes76", - "name": " Modeling home prices using realtor data", - "number_format": 31, - "remove_quotes": true, - "separator": "SPACE", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": false - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": " This file contains total US gross box office receipts for 49 movies. This data is to accompany the article entitled Movie Data.", - "download": "http://jse.amstat.org/datasets/movietotal.dat.txt", - "filename": "movietotal", - "name": "movietotal", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "For 97 countries in the world, data are given for birth rates, death\nrates, infant death rates, life expectancies for males and females, and\nGross National Product.\n\nVARIABLE DESCRIPTIONS:\nColumns\n 1 - 6 Live birth rate per 1,000 of population\n 7 - 14 Death rate per 1,000 of population\n15 - 22 Infant deaths per 1,000 of population under 1 year old\n23 - 30 Life expectancy at birth for males\n31 - 38 Life expectancy at birth for females\n39 - 46 Gross National Product per capita in U.S. dollars \n47 - 52 Country Group\n 1 = Eastern Europe\n 2 = South America and Mexico\n 3 = Western Europe, North America, Japan, Australia, New Zealand\n 4 = Middle East\n 5 = Asia\n 6 = Africa\n53 - 74 Country", - "download": "http://jse.amstat.org/datasets/poverty.dat.txt", - "filename": "poverty", - "name": "The Statistics of Poverty and Inequality ", - "number_format": 31, - "remove_quotes": true, - "separator": "SPACE", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": false - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Some statistics on the fishing fleet and commercial catch, for each year\nbetween 1960 and 1982. The six columns are:\n\n 1. year\n 2. number of vessels registered for fishing\n 3. number of crab caught\n 4. total weight in kilograms of crab caught\n 5. total number of pot-lifts.\n 6. wholesale price of king crab in dollars per pound", - "download": "http://lib.stat.cmu.edu/crab/fleet", - "filename": "fleet", - "name": "fishing fleet and commercial catch", - "number_format": 31, - "remove_quotes": true, - "separator": "SPACE", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": false - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Commercial catch data for 1960-1982, broken out by district. The four columns\nare:\n\n 1. year\n 2. district number (1, 2, 3 or 4)\n 3. total catch as a count\n 4. total catch in kilograms", - "download": "http://lib.stat.cmu.edu/crab/catch", - "filename": "catch", - "name": "Commercial catch data", - "number_format": 31, - "remove_quotes": true, - "separator": "SPACE", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": false - } - ], - "subcategory_name": "Economics" - }, - { - "datasets": [ - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The following description is from Lee (1994): \nRugby football is a popular quasi-amateur sport widely played in the United Kingdom, France, Australia, New Zealand and South Africa. It is rapidly gaining popularity in the US, Canada, Japan and parts of Europe. Recently, some of the rules of the game have been changed, with the aim of making play more exciting. In a study to examine the effects of the rule changes, Hollings and Triggs (1993) collected data on some recent games. \nTypically, a game consists of bursts of activity which terminate when points are scored, if the ball is moved out of the field of play or if an infringement of the rules occurs. In 1992, the investigators gathered data on ten international matches which involved the New Zealand national team, the All Blacks. The first five games studied were the last international games played under the old rules, and the second set of five were the first internationals played under the new rules. \nFor each of the ten games, the data list the successive times (in seconds) of each passage of play in that game. One interest is to see whether the passages were on average longer or shorter under the new rules. (The intention when the rules were changed was almost certainly to make the play more continuous.) ", - "download": "http://www.statsci.org/data/oz/rugby.txt", - "filename": "rugby", - "name": "Time of Passages of Play in Rugby", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Mark Taylor was Captain of the Australian test cricket team from May 1994 until February 1999. By the middle of 1997, the Australian team has won its 7 consecutive international test series, making Taylor the most successful Australian Captain in history. However his poor batting form from mid 1996 to mid 1997 gave the Australian selectors a dilemma in deciding whether his excellent Captaincy made up for the run of poor scores off his own bat. \nThe data below gives Mark Taylor's test scores from the middle of 1989 to the middle of 1995, a period over which he was batting well. Scores were made in Australia's first or second innings of each match. Sometimes Australia was not required to bat twice, in which case the second innings is marked as missing. There are also a number of `not outs'.", - "download": "http://www.statsci.org/data/oz/taylor.txt", - "filename": "taylor_", - "name": "Mark Taylor's Test Cricket Scores", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The Sydney-Hobart yacht race starts from Sydney Harbour on Boxing day (December 26) and finishes several days later in Hobart. It is a 630 nautical mile ocean race. The data give the winning times from 1945 to 1993, as they appeared in the Sydney Morning Herald on 24 December, 1994, plus the winning times for 1994 to 1997. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nYacht\n\nName of winning yacht\n\nYear\n\nYear\n\nDays\n\nDays unit of winning time\n\nHours\n\nHours unit of winning time\n\nMinutes\n\nMinutes unit of winning time\n\nTime\n\nWinning time in minutes (should match time in Days, Hours and Minutes)\n", - "download": "http://www.statsci.org/data/oz/sydhob.txt", - "filename": "sydhob", - "name": "Sydney to Hobart Yacht Race Winning Times", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Winning heights or distances (inches) for the High Jump, Discus and Long Jump events at the Olympics up to 1996. ", - "download": "http://www.statsci.org/data/general/olympic.txt", - "filename": "olympic", - "name": "Olympic Records for High Jump, Discus and Long Jump", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The data give the number of medals won by each medal-winning country in the 1992 Summary Olympic Games in Barcelona, Spain, and the 1994 Winter Olympic Games in Lillehammer, Norway. Also given is the population and latitude of each country. Griffiths et al write: \n... the media spent a lot of time discussing the number of medals won by each country's athletes. The implication was that the comparison was of some importance. However, larger countries would be expected to win more medals than smaller countries, simply because of their larger populations. \n... some viewers, especially those from the smaller countries, felt that the number of medals should be standardised to account for the very wide range of populations, and that a per capita number of medals for a country was a fairer comparison. Others felt that this was unfair to the countries with larger populations - that having twice as many people did not lead to twice as many medals. If standardisation is performed adequately, there should be no systematic relationship between the adjusted medal count and population. \nAlso countries further from the equator might be expected to do better in the winter olympics. \nThe data is incomplete in that countries with no medals are not included. These would be mostly smaller population countries. ", - "download": "http://www.statsci.org/data/oz/medals.txt", - "filename": "medals", - "name": "Olympic Medals", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "This data set was assembled by Rowan Todd and Mark McNaughton, two students studying Statistics at QUT in a class taught by Dr Margaret Mackisack. For a class project they decided to investigate the effect on football game attendance of various covariates. They collected data involving Saturday Australian Football League (AFL) matches at the Melbourne Cricket Ground (MCG). They looked only at matches during the normal home and away season (i.e. not including finals). They used statistics from all such games in 1993 and 1994 (nineteen relevant matches in 1993 and twenty-two in 1994). The response variable measured was attendance at the MCG, and after consideration, they came up with the following covariates: \n\n\n\n\n\nVariable \n\nDescription\n\n\n\n\nMCG \n\nAttendance at the MCG in 1000's.\n\nTemp \n\nTemperature. The forecast maximum temperature on the day of the match, in whole degrees C, found in The Weekend Australian.\n\nOther\n\nAttendance at other matches in 1000's. The sum of the attendances at other AFL matches in Melbourne and Geelong on the same day as the match in question.\n\nMembers\n\nMembership. The sum of the memberships of the two clubs whose teams were playing the match in question in 1000's.\n\nTop50\n\nNumber of players from the top fifty. The number of players in the top 50 in the AFL who happened to be playing in the match in question.\n\nDate\n\nDate of the match in the format dd/mm/yy.\n\nHome\n\nAbbreviation for home team.\n\nAway\n\nAbbreviation for away team.\n", - "download": "http://www.statsci.org/data/oz/afl.txt", - "filename": "afl", - "name": "AFL Crowd Attendance at the MCG", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The following description is from Lee (1994): \nRugby football is a popular quasi-amateur sport widely played in the United Kingdom, France, Australia, New Zealand and South Africa. It is rapidly gaining popularity in the US, Canada, Japan and parts of Europe. Recently, some of the rules of the game have been changed, with the aim of making play more exciting. In a study to examine the effects of the rule changes, Hollings and Triggs (1993) collected data on some recent games. \nTypically, a game consists of bursts of activity which terminate when points are scored, if the ball is moved out of the field of play or if an infringement of the rules occurs. In 1992, the investigators gathered data on ten international matches which involved the New Zealand national team, the All Blacks. The first five games studied were the last international games played under the old rules, and the second set of five were the first internationals played under the new rules. \nFor each of the ten games, the data list the successive times (in seconds) of each passage of play in that game. One interest is to see whether the passages were on average longer or shorter under the new rules. (The intention when the rules were changed was almost certainly to make the play more continuous.) \n", - "download": "http://www.statsci.org/data/oz/rugby.txt", - "filename": "rugby_", - "name": "Time of Passages of Play in Rugby", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The data give the record-winning times for 35 hill races in Scotland, as reported by Atkinson (1986). The distance travelled and the height climbed in each race is also given. The data contains a known error - Atkinson (1986) reports that the record for Knock Hill (observation 18) should actually be 18 minutes rather than 78 minutes. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nRace\n\nName of race\n\nDistance\n\nDistance covered in miles\n\nClimb\n\nElevation climbed during race in feet\n\nTime\n\nRecord time for race in minutes\n", - "download": "http://www.statsci.org/data/general/hills.txt", - "filename": "hills_", - "name": "Scottish Hill Races", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Investigators studied physical characteristics and ability in 13 football punters. Each volunteer punted a football ten times. The investigators recorded the average distance for the ten punts, in feet. They also recorded the average hang time (time the ball is in the air before the receiver catches it) for the ten punts, in seconds. In addition, the investigators recorded five measures of strength and flexibility for each punter: right leg strength (pounds), left leg strength (pounds), right hamstring muscle flexibility (degrees), left hamstring muscle flexibility (degrees), and overall leg strength (foot-pounds). From the study \"The relationship between selected physical performance variables and football punting ability\" by the Department of Health, Physical Education and Recreation at the Virginia Polytechnic Institute and State University, 1983. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nDistance\n\nDistance travelled in feet\n\nHang\n\nTime in air in seconds\n\nR_Strength\n\nRight leg strength in pounds\n\nL_Strength\n\nLeft leg strength in pounds\n\nR_Flexibility\n\nRight leg flexibility in degrees\n\nL_Flexibility\n\nLeft leg flexibility in degrees\n\nO_Strength\n\nOverall leg strength in pounds\n", - "download": "http://www.statsci.org/data/general/punting.txt", - "filename": "punting", - "name": "American Football Punters", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Data on 102 male and 100 female athletes collected at the Australian Institute of Sport, courtesy of Richard Telford and Ross Cunningham. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nSport\n\nSport\n\nSex\n\nmale or female\n\nHt\n\nHeight in cm\n\nWt\n\nWeight in kg\n\nLBM\n\nLean body mass\n\nRCC\n\nRed cell count\n\nWCC\n\nWhite cell count\n\nHc\n\nHematocrit\n\nHg\n\nHemoglobin\n\nFerr\n\nPlasma ferritin concentration\n\nBMI\n\nBody mass index = weight/height^2\n\nSSF\n\nSum of skin folds\n\n%Bfat\n\n% body fat\n\n\n\n", - "download": "http://www.statsci.org/data/oz/ais.txt", - "filename": "ais_", - "name": "Australian Institute of Sport", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The data was collected by Grant Elliott, a statistics student at the Queensland University of Technology in a subject taught by Dr Margaret Mackisack. Here is his description of the data and its collection: \nLiving at a squash court spurred on the idea of this experiment. Frustrated playing squash one night, I thought that the squash ball I was playing with seemed to bounce and react differently to what I was previously used to. So I conducted this experiment on the squash ball, looking at the type of ball, temperature of the ball and the age of the ball. \nBall type: In this experiment I used a 'yellow dot' squash ball and a 'double x' squash ball. A 'yellow dot' is super slow and a 'double x' is termed extra super slow. \nTemperature: When playing with a squash ball it tends to heat up. So I took it to extremes where I had 'room temperature' and 'playing temperature'. To duplicate 'playing temperature' the ball was placed in a cup of boiling water for 45 sec. \nAge: I expected age to be my most significant factor. Squash balls, being a sealed ball, shouldn't vary when they get older, so I used a new ball and compared it to an old ball. \nProcedure: I first thought of dropping the balls from a set height and seeing how far they bounced against a tape measure. This idea was scrapped as too much error came into it because you couldn't accurately measure when the maximum height of the bounce was. I then thought of a ball machine. I set the ball machine up and measured how far back did the ball come off the front wall when shot out of the ball machine. This eliminated a lot of varying in my figures as the ball machine shoots the balls out at roughly the same speed and trajectory. It doesn't take all the varying out as I wouldn't know whether the ball machine does shoot it out at exactly the same speed, but it keeps variation to a minimum. \nCriticism: Measuring the distance from the wall was done by my friend and I. We both would watch from different angles and would see where the ball landed. This means our figures are probably out by a couple of centimetres. When the balls were dropped into the water I forgot to take some of them out after 45 sec. Also with some I moved them around in the water to get the heat distributed evenly but others I forgot to move as I was collecting and organising the next ball. Another criticism is the temperature of the water. I put new boiling water into the cup after 4 balls had been in it. Therefore the last ball to go in wouldn't be the same temperature as the first ball.", - "download": "http://www.statsci.org/data/oz/squash.txt", - "filename": "squash", - "name": "Squash Ball Experiment", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The dynamic and repetitive nature of running means that runners are particularly prone to over-use injuries such as lower back pain. Chronic pain is often caused by muscle imbalances, which result in faulty alignment of and abnormal stresses applied to the spinal column. Muscle imbalances originate as adapations in motor control due to pain or external stimuli, and are then reinforced and preserved by repetition. \nThis study, conducted by Physiotherapy student Andrew Mooney, examined the flexibility of four major muscle groups associated with movement of the hip, with particular attention to imbalances between the left and right sides or between the dominant and non-dominant sides. \nA total of 33 male subjects were included in the study. The subjects were divided into three groups: 11 runners with low back pain, 11 runners without low back pain and 11 sedentary individuals without low back pain. (Runners were recruited from the Ashgrove and Toowong athletics clubs, non runners from the University of Queensland and the general community. Runners with lower back pain were recruited first. Once this subject group was tested, subjects for the two control groups were recruited to match the runners with low back pain according to age, height and weight.) \nThe muscle groups examined were \nthe iliopsoas, \nthe rectus femoris, \nthe tensor fascia lata/iliotibial band (ITB/TFL), and \nthe hamstrings \nFor each muscle group, two measures of flexibility were used. The first, relative flexibility, was related to the range of movement of the joint before postural compensations occurred, and the second was a measure of the maximal functional length of the muscle. Relative flexibility and functional length were measured for each muscle group on both the left and right sides of the body. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nSubject \n\nSubject number, 1 - 33\n\nGroup\n\nPain, NoPain or Sedentary\n\nMatch\n\n1 - 11, indicating matched triples\n\nAge\n\nAge of subject in years\n\nHeight\n\nHeight of subject in cm\n\nWeight\n\nWeight of subject in kg\n\nDistance\n\nType of running event: Sprint, middle distance (Mid) or long distance (Long)\n\nYears\n\nNumber of years running\n\nDominant\n\nDominant side, Left or Right\n\nDF.Iliopsoas\n\nRelative flexibility of iliopsoas muscle on dominant side in degrees. Positive numbers indicate above the horizontal, negative numbers below the horizontal.\n\nDF.Rectus\n\nRelative flexibility of rectus femoris muscle on dominant side in degrees\n\nDF.ITBTFL\n\nRelative flexibility of ITB/TFL muscle on dominant side in degrees\n\nDF.Hamstring\n\nRelative flexibility of hamstring muscles on dominant side in degrees\n\nDL.Iliopsoas\n\nFunctional length of iliopsoas muscle on dominant side\n\nDL.Rectus\n\nFunctional length of rectus femoris muscle on dominant side\n\nDL.ITBTFL\n\nFunctional length of ITB/TFL muscle on dominant side\n\nDL.Hamstring\n\nFunctional length of hamstring muscles on dominant side\n\nNF.Iliopsoas\n\nRelative flexibility of iliopsoas muscle on nondominant side in degrees\n\nNF.Rectus\n\nRelative flexibility of rectus femoris muscle on nondominant side in degrees\n\nNF.ITBTFL\n\nRelative flexibility of ITB/TFL muscle on nondominant side in degrees\n\nNF.Hamstring\n\nRelative flexibility of hamstring muscles on nondominant side in degrees\n\nNL.Iliopsoas\n\nFunctional length of iliopsoas muscle on nondominant side\n\nNL.Rectus\n\nFunctional length of rectus femoris muscle on nondominant side\n\nNL.ITBTFL\n\nFunctional length of ITB/TFL muscle on nondominant side\n\nNL.Hamstring\n\nFunctional length of hamstring muscles on nondominant side\n", - "download": "http://www.statsci.org/data/oz/backpain.txt", - "filename": "backpain", - "name": "Runners with Low Back Pain", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The data come from the 1990 Pilot Surf/Health Study of NSW Water Board. The first column takes values 1 or 2 according to the recruit's perception of whether (s)he is a Frequent OCean Swimmer, the second column has values 1 or 4 according to recruit's usually chosen swimming location (1 for non-beach, 4 for beach), the third column has values 2 (aged 15-19), 3 (aged 20-25), or 4 (aged 25-29), the fourth column has values 1 (male) or 2 (female) and finally, the fifth column has the number of self-diagnosed ear infections that were reported by the recruit.", - "download": "http://www.statsci.org/data/oz/earinf.txt", - "filename": "earinf", - "name": "Ear Infections in Swimmers", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": " Alex Rodriguez (known to fans as A-Rod)was the youngest player ever to hit 500 home runs. The file holds the number of home runs hit by A-Rod during the 1994–2016 seasons. Describe the distribution, mentioning its shape and any unusual features. ", - "download": "https://dasl.datadescription.com/download/data/3038", - "filename": "a-rod-2016", - "name": "A-Rod 2016", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "In Olympic Archery both men and women start with a field of 64 qualifiers. Each archer shoots a round of 72 arrows (total possible score: 720) to establish a seeding position. Then they participate in a single-elimination contest. Thus, the seeding round is the only one that provides data for all archers (because some are […] ", - "download": "https://dasl.datadescription.com/download/data/3056", - "filename": "Archery", - "name": "Archery", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "American League baseball games are played under the designated hitter rule, meaning that pitchers, often weak hitters, do not come to bat. Baseball owners believe that the designated hitter rule means more runs scored, which in turn means higher attendance. Is there evidence that more fans attend games if the teams score more runs? The […] ", - "download": "https://dasl.datadescription.com/download/data/3057", - "filename": "Attendance-2016", - "name": "Attendance 2016", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "It has been suggested that children born in the summer have an advantage over their peers when it comes to sports, perhaps because they can be outdoors when they are young. The data report the number of professional ballplayers born in each month of the year for one season of professional baseball. ", - "download": "https://dasl.datadescription.com/download/data/3060", - "filename": "Ballplayer-births", - "name": "Ballplayer births", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "-", - "download": "https://dasl.datadescription.com/download/data/3063", - "filename": "Baseball-attendance", - "name": "Baseball attendance", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "-", - "download": "https://dasl.datadescription.com/download/data/3064", - "filename": "Baseball-circumferences", - "name": "Baseball circumferences", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Ballplayers have been signing ever larger contracts. The highest salaries (in millions of dollars per season) for each year since 1874 are in the data file. ", - "download": "https://dasl.datadescription.com/download/data/3065", - "filename": "Baseball-salaries-2015", - "name": "Baseball salaries 2015", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "-", - "download": "https://dasl.datadescription.com/download/data/3066", - "filename": "Baseball-salaries-2016", - "name": "Baseball salaries 2016", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "-", - "download": "https://dasl.datadescription.com/download/data/3067", - "filename": "Baseball-weights", - "name": "Baseball weights", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "-", - "download": "https://dasl.datadescription.com/download/data/3069", - "filename": "Basketball-shots", - "name": "Basketball shots", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "A company that makes basketballs has the motto: “Our basketballs are ready to play.” Therefore, it is important to the company that the basketballs are inflated with the proper amount of air when shipped. Most basketballs are inflated to 7 to 9 pounds per square inch. Recently the company selected a random basketball from its […] ", - "download": "https://dasl.datadescription.com/download/data/3068", - "filename": "Basketballs", - "name": "Basketballs", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The Belmont Stakes is the last and longest of the three horse races that make up the Triple Crown. Curiously, in some of the Belmont races horses have run clockwise around the track, and in others they have run counterclockwise. Do the horses care? But note that the length of the race has also not […] ", - "download": "https://dasl.datadescription.com/download/data/3072", - "filename": "Belmont-stakes-2015", - "name": "Belmont stakes 2015", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "-", - "download": "https://dasl.datadescription.com/download/data/3151", - "filename": "Cyclists-2015", - "name": "Cyclists 2015", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "-", - "download": "https://dasl.datadescription.com/download/data/3154", - "filename": "Darts", - "name": "Darts", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "In the National league all players take a turn at bat. But in the American league, a “designated hitter” usually bats for the pitcher, who is likely not to be a strong batter. The theory is that a designated hitter will lead to more hits, more runs, and a higher-scoring game. The data give the average runs per game and total home runs for major league baseball teams during the 2012 season. Is there a discernible difference between the leagues?", - "download": "https://dasl.datadescription.com/download/data/3159", - "filename": "Designated-hitter-2012", - "name": "Designated hitter 2012", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Motorcycles designed to run off-road, often known as dirt bikes, are specialized\nvehicles. The dataset holds data on 114 many attributes of dirt bikes.\nSome cost as little as\n$1399, while others are substantially more expensive. One interest is in building a model to predict the price of a dirt bike from attributes of the bikes.", - "download": "https://dasl.datadescription.com/download/data/3166", - "filename": "Dirt-bikes", - "name": "Dirt bikes 2014", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "A leading manufacturer of exercise\nequipment wanted to collect data on the effectiveness of\ntheir equipment. An August 2001 article in the journal\nMedicine and Science in Sports and Exercise compared how\nlong it would take men and women to burn 200 calories\nduring light or heavy workouts on various kinds of exercise\nequipment. The results summarized in the table are the average\ntimes for a group of physically active young men and\nwomen whose performances were measured on a representative\nsample of exercise equipment.", - "download": "https://dasl.datadescription.com/download/data/3195", - "filename": "Exercise-equipment", - "name": "Exercise equipment", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Football owners are constantly in competition for good players. The more wins, the more likely that the team will provide good business returns for the owners. The resources that each of the 32 teams has in the National Football League (NFL) vary, but the draft system is designed to counteract the advantages that wealthier teams may have.", - "download": "https://dasl.datadescription.com/download/data/3214", - "filename": "Football-salaries-2017", - "name": "Football salaries 2017", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "A student performed an experiment with three different grips to see what effect it might have on the distance of a backhanded Frisbee throw. She tried it with her normal grip, with one finger out, and with the Frisbee inverted. She measured in paces how far her throws went. ", - "download": "https://dasl.datadescription.com/download/data/3221", - "filename": "Frisbee-throws", - "name": "Frisbee throws", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Golf courses", - "download": "https://dasl.datadescription.com/download/data/3245", - "filename": "Golf-courses", - "name": "Golf courses", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The average drive distance (in yards) for 199 professional golfers during a week on the men’s PGA tour in 2015.", - "download": "https://dasl.datadescription.com/download/data/3246", - "filename": "Golf-drives-2015", - "name": "Golf drives 2015", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Golfers 2017", - "download": "https://dasl.datadescription.com/download/data/3244", - "filename": "Golfers-2017", - "name": "Golfers 2017", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The 2.5-mile Indianapolis Motor Speedway has\nbeen the home to a race on Memorial Day nearly every year\nsince 1911. Even during the first race, there were controversies.\nRalph Mulford was given the checkered flag first but took three\nextra laps just to make sure he’d completed 500 miles. When he\nfinished, another driver, Ray Harroun, was being presented with\nthe winner’s trophy, and Mulford’s protests were ignored. Harroun\naveraged 74.6 mph for the 500 miles. In 2013, the winner,\nTony Kanaan, averaged over 187 mph, beating the previous record\nby over 17 mph!", - "download": "https://dasl.datadescription.com/download/data/3288", - "filename": "Indy-2016", - "name": "Indy 500 2016", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The 2.5-mile Indianapolis Motor Speedway has\nbeen the home to a race on Memorial Day nearly every year\nsince 1911. Even during the first race, there were controversies.\nRalph Mulford was given the checkered flag first but took three\nextra laps just to make sure he’d completed 500 miles. When he\nfinished, another driver, Ray Harroun, was being presented with\nthe winner’s trophy, and Mulford’s protests were ignored. Harroun\naveraged 74.6 mph for the 500 miles. In 2013, the winner,\nTony Kanaan, averaged over 187 mph, beating the previous record\nby over 17 mph!", - "download": "https://dasl.datadescription.com/download/data/3289", - "filename": "Indy-2017", - "name": "Indy 500 2017", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The 2.5-mile Indianapolis Motor Speedway has\nbeen the home to a race on Memorial Day nearly every year\nsince 1911. Even during the first race, there were controversies.\nRalph Mulford was given the checkered flag first but took three\nextra laps just to make sure he’d completed 500 miles. When he\nfinished, another driver, Ray Harroun, was being presented with\nthe winner’s trophy, and Mulford’s protests were ignored. Harroun\naveraged 74.6 mph for the 500 miles. In 2013, the winner,\nTony Kanaan, averaged over 187 mph, beating the previous record\nby over 17 mph!", - "download": "https://dasl.datadescription.com/download/data/3290", - "filename": "Indy-2018", - "name": "Indy 500 2018", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The Kentucky Derby is a horse race\nthat has been run every year since 1875 at Churchill Downs\nin Louisville, Kentucky. The race started as a 1.5-mile race,\nbut in 1896, it was shortened to 1.25 miles because experts\nfelt that 3-year-old horses shouldn’t run such a long race that\nearly in the season. (It has been run in May every year but\none—1901—when it took place on April 29.)", - "download": "https://dasl.datadescription.com/download/data/3305", - "filename": "Kentucky-Derby-2016", - "name": "Kentucky Derby 2016", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The Kentucky Derby is a horse race\nthat has been run every year since 1875 at Churchill Downs\nin Louisville, Kentucky. The race started as a 1.5-mile race,\nbut in 1896, it was shortened to 1.25 miles because experts\nfelt that 3-year-old horses shouldn’t run such a long race that\nearly in the season. (It has been run in May every year but\none—1901—when it took place on April 29.)", - "download": "https://dasl.datadescription.com/download/data/3306", - "filename": "Kentucky-Derby-2017", - "name": "Kentucky Derby 2017", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The Kentucky Derby is a horse race\nthat has been run every year since 1875 at Churchill Downs\nin Louisville, Kentucky. The race started as a 1.5-mile race,\nbut in 1896, it was shortened to 1.25 miles because experts\nfelt that 3-year-old horses shouldn’t run such a long race that\nearly in the season. (It has been run in May every year but\none—1901—when it took place on April 29.)", - "download": "https://dasl.datadescription.com/download/data/3307", - "filename": "Kentucky-Derby-2018", - "name": "Kentucky Derby 2018", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "NY Marathon 2016", - "download": "https://dasl.datadescription.com/download/data/3370", - "filename": "NY-Marathon-2016", - "name": "NY Marathon 2016", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "How are Olympic performances in various events related? The data gives winning long-jump and high-jump distances in meters, for the Summer Olympics from 1912 through 2016 ", - "download": "https://dasl.datadescription.com/download/data/3382", - "filename": "Olympic-jumps-2016", - "name": "Olympic jumps 2016", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "NFL data from the 2015 football season reported the number of yards gained by each of the league’s 488 receivers ", - "download": "https://dasl.datadescription.com/download/data/3425", - "filename": "Receivers-2015", - "name": "Receivers 2015", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Times (in minutes) for one runner to run 4 miles on various courses during a 10-year period. ", - "download": "https://dasl.datadescription.com/download/data/3433", - "filename": "Run-times", - "name": "Run times", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Hill races are races that climb generally steep hills, held throughout Scotland throughout the year. The file holds records for men and women in these races the last time those were posted in an accessible table along with facts about the races. In particular, we know the length(km) and total climb(m). These are two independent predictors of the record times. Sex of the runner can be an additional indicator variable.", - "download": "https://dasl.datadescription.com/download/data/3440", - "filename": "Scottish-Hill-Races", - "name": "Scottish Hill Races", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "A college hockey coach collected data from the 2016–2017 National Hockey League season. He hopes to convince his players that the number of shots taken has an effect on the number of goals scored. The data includes both offensive and defensive players. ", - "download": "https://dasl.datadescription.com/download/data/3448", - "filename": "Shoot-to-Score-2016", - "name": "Shoot to Score 2016", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Bjork Larsen was trying to decide whether to use a\nnew racing wax for cross-country skis. He decided that the\nwax would be worth the price if he could average less than\n55 seconds on a course he knew well, so he planned to study\nthe wax by racing on the course 8 times. The data report his race times. \n", - "download": "https://dasl.datadescription.com/download/data/3450", - "filename": "Ski-wax", - "name": "Ski wax", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The Men’s Giant Slalom skiing event consists of two runs whose times are added together for a final score. The data give the giant slalom times in the 2014 Winter Olympics at Sochi. ", - "download": "https://dasl.datadescription.com/download/data/3451", - "filename": "Slalom-times-2014", - "name": "Slalom times 2014", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The Men’s Giant Slalom skiing event consists of two runs whose times are added together for a final score. The data give the giant slalom times in the 2018 Winter Olympics at PyeongChang. ", - "download": "https://dasl.datadescription.com/download/data/3452", - "filename": "Slalom-times-2018", - "name": "Slalom times 2018", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Advertisements for an instructional video claim that the techniques will improve the ability of Little League pitchers to throw strikes and that, after undergoing the training, players will be able to throw strikes on at least 60% of their pitches. To test this claim, we have 20 Little Leaguers throw 50 pitches each, and we record the number of strikes. After the players participate in the training program, we repeat the test. The table shows the number of strikes each player threw before and after the training.", - "download": "https://dasl.datadescription.com/download/data/3464", - "filename": "Strikes", - "name": "Strikes", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Fifty nine countries won gold medals in the 2016 Summer Olympics. The dataset lists them, along with the total number of gold medals each won. It can be a challenge to find a good display for data like these. ", - "download": "https://dasl.datadescription.com/download/data/3468", - "filename": "Summer-Olympics-2016", - "name": "Summer Olympics 2016", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Super Bowl 2016", - "download": "https://dasl.datadescription.com/download/data/3470", - "filename": "Super-Bowl-2016", - "name": "Super Bowl 2016", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Swim and Run", - "download": "https://dasl.datadescription.com/download/data/3577", - "filename": "Swim-Run", - "name": "Swim and Run", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "People swim across Lake Ontario from Niagara on the Lake to Toronto–a distance of 52 km (32,3 miles). Because the lake is fresh water, this swim is considered more difficult than ocean swims of similar length because salt water provides more boyancy than fresh water. (For comparison, the English Channel is 21 miles across and, despite strong currents, generally takes less time to cross.)", - "download": "https://dasl.datadescription.com/download/data/3473", - "filename": "Swim-lake", - "name": "Swim the lake 2016", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Unlike track events, swimming heats are not determined at random. Instead, swimmers are seeded so that better swimmers are placed in later heats. Here are the times (in seconds) for the women’s 400-m freestyle for two heats in the 2016 Olympics. ", - "download": "https://dasl.datadescription.com/download/data/3471", - "filename": "Swimming-heats", - "name": "Swimming heats 2016", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Swimming heats London", - "download": "https://dasl.datadescription.com/download/data/3472", - "filename": "Swimming-heats-London", - "name": "Swimming heats London", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The Tour de France is the most famous bicycle race in the world. It has been run every year since 1903, except for a few during wars. The data report facts about the winners including age, time, distance, and average speed. Lance Armstrong’s 7 consecutive victories been disqualified due to the use of performance-enhancing drugs, but his statistics are still included here.", - "download": "https://dasl.datadescription.com/download/data/3489", - "filename": "Tour-de-France-2016", - "name": "Tour de France 2016", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The Tour de France is the most famous bicycle race in the world. It has been run every year since 1903, except for a few during wars. The data report facts about the winners including age, time, distance, and average speed. Lance Armstrong’s 7 consecutive victories been disqualified due to the use of performance-enhancing drugs, but his statistics are still included here.", - "download": "https://dasl.datadescription.com/download/data/3490", - "filename": "Tour-de-France-2017", - "name": "Tour de France 2017", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The Gallup poll asked 1008 Americans age 18 and over whether they planned to watch the upcoming Super Bowl. The pollster also asked those who planned to watch whether they were looking forward more to seeing the football game or the commercials. ", - "download": "https://dasl.datadescription.com/download/data/3516", - "filename": "Watch-Super-bowl", - "name": "Watch the Super bowl", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The world men’s weightlifting records are categorized by weight class of the competitors. How does the weight class relate to the record? ", - "download": "https://dasl.datadescription.com/download/data/3520", - "filename": "Weightlifting-2016", - "name": "Weightlifting 2016", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The Boston Marathon has had a wheelchair division since 1977.\nWho do you think\nis typically faster, the men’s marathon winner on foot\nor the women’s wheelchair marathon winner? Because\nthe conditions differ from year to year, and speeds have\nimproved over the years, it seems best to treat these as\npaired measurements. Here are summary statistics for\nthe pairwise differences in finishing time (in minutes):", - "download": "https://dasl.datadescription.com/download/data/3521", - "filename": "Wheelchair-Marathon", - "name": "Wheelchair Marathon 2016", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The Sears Cup was established in 1993\nto honor institutions that maintain a broad-based athletic\nprogram, achieving success in many sports, both men’s and\nwomen’s. In the years following its Division III inception in\n1995, the cup was won by Williams College 15 of 17 years.\nWhy did the football team win so much? Was it because\nthey were heavier than their opponents? The data gives the\naverage team weights for selected years from 1973 to 1993.", - "download": "https://dasl.datadescription.com/download/data/3525", - "filename": "Williams-football", - "name": "Williams football", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The times from the first race of the women’s 2 X 500-m speed skating times at the 2010 Winter Olympics in Vancouver, B.C. are given. ", - "download": "https://dasl.datadescription.com/download/data/3530", - "filename": "speed-skating", - "name": "Winter Olympics 2010 speed skating", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Source: https://www.olympic.org/sochi-2014/alpine-skiing/slalom-men", - "download": "https://dasl.datadescription.com/download/data/3531", - "filename": "Winter-Olympics-2014", - "name": "Winter Olympics 2014", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "53 men completed the men’s alpine downhill. The gold medal winner finished in 100.25 seconds. Here are the times (in seconds) for all competitors. ", - "download": "https://dasl.datadescription.com/download/data/3532", - "filename": "olympics-downhill", - "name": "Winter olympics 2018 downhill", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "he women’s heptathlon in the Olympics consists of seven track-and-field events: the 200 m and 800 m runs, 100 m high hurdles, shot put, javelin, high jump, and long jump. Each contestant is awarded points for each event based on her performance. So, which performance deserves more points? It’s not clear how to compare them. They aren’t measured in the same units, or even in the same direction (longer jumps are better but shorter times are better.)", - "download": "https://dasl.datadescription.com/download/data/3536", - "filename": "Womens-Heptathlon", - "name": "Womens Heptathlon 2016", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The Women’s 500 metres in short track speed skating at the 2018 Winter Olympics took place from 10 to 13 February 2018 at the Gangneung Ice Arena in Gangneung, South Korea.The defending champion from 2014, Li Jianrou, had retired, but the 2014 silver medalist Arianna Fontana competed and eventually won the event. ", - "download": "https://dasl.datadescription.com/download/data/3537", - "filename": "Womens-short-track", - "name": "Womens short track 2018", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The women’s 1500 metres speed skating competition for the 2006 Winter Olympics was held in Turin, Italy, on 22 February ", - "download": "https://dasl.datadescription.com/download/data/3538", - "filename": "Womens-speed-skating", - "name": "Womens speed skating 2006", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "http://jse.amstat.org/datasets/ballbearings.txt", - "download": "http://jse.amstat.org/datasets/ballbearings.dat.txt", - "filename": "ballbearings", - "name": "Ball Bearing Reliability Data", - "number_format": 31, - "remove_quotes": true, - "separator": "SPACE", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": false - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "http://jse.amstat.org/datasets/baseball.txt", - "download": "http://jse.amstat.org/datasets/baseball.dat.txt", - "filename": "baseball_", - "name": "baseball", - "number_format": 31, - "remove_quotes": true, - "separator": "SPACE", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": false - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "This data set contains every NCAA Basketball Tournament game \never played. The tournament has been held every year since 1939.", - "download": "http://jse.amstat.org/datasets/basketball.dat.txt", - "filename": "Basketball", - "name": "NCAA Basketball Tournament Data", - "number_format": 31, - "remove_quotes": true, - "separator": "SPACE", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": false - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Data are provided for Barry Bonds' plate appearances in the 2001\nbaseball season. Variables include characteristics of the innings\nbefore the first pitch to Bonds (e.g., the number of outs, the number\nof runners on each base, the score, the opposing pitcher's earned run\naverage) and after the first pitch to Bonds (e.g., the outcome of the\nappearance, how many runs scored in the inning after Bonds hits).", - "download": "http://jse.amstat.org/datasets/bonds2001.dat.txt", - "filename": "Bonds", - "name": "Barry Bonds' 2001 Plate Appearances", - "number_format": 31, - "remove_quotes": true, - "separator": "SPACE", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": false - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The dataset contains the scores, opponents, and sites of the 18 Big Ten\nmen's basketball games that involved the University of Iowa in 1997.", - "download": "http://jse.amstat.org/datasets/hawks.dat.txt", - "filename": "hawks", - "name": " 1997 University of Iowa Big Ten Basketball Data", - "number_format": 31, - "remove_quotes": true, - "separator": "SPACE", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": false - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The dataset consists of game-by-game information for the 1998 season\nfor Mark McGwire and the St. Louis Cardinals, and Sammy Sosa and the\nChicago Cubs. The dataset includes information on the home run hitting\nof these two players, as well as game results for the teams.", - "download": "http://jse.amstat.org/datasets/homerun.dat.txt", - "filename": "homerun", - "name": "The 1998 Home Run Race Between Mark McGwire and Sammy Sosa", - "number_format": 31, - "remove_quotes": true, - "separator": "SPACE", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": false - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Data are from The Baseball Encyclopedia (1993) and Total Baseball (2001). \nThey include the location, league affiliation (National or American), \ndivision affiliation (East, Central, or West), season of play, home game \nattendance, runs scored, runs allowed, wins, losses, and number of games \nbehind the division leader for each major league franchise for the 1969 \nthrough 2000 seasons. Other data (including opening dates for new stadia, \nand dates of work stoppages) were collected from Ballparks by Munsey and \nSuppes (2001) and InfoPlease (2001).", - "download": "http://jse.amstat.org/datasets/MLBattend.dat.txt", - "filename": "MLBattend", - "name": "1969-2000 Major League Baseball Attendance data", - "number_format": 31, - "remove_quotes": true, - "separator": "SPACE", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": false - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Driver results for all NASCAR races between 1975 1nd 2003, inclusive. The dataset constitutes all \nparticipants in each of 898 races, and includes their start/finish postions, prize winnings, car \nmake and laps completed.", - "download": "http://jse.amstat.org/datasets/nascard.dat.txt", - "filename": "nascard", - "name": "NASCAR Driver Results", - "number_format": 31, - "remove_quotes": true, - "separator": "SPACE", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": false - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Race results for all NASCAR Winston Cup races between 1975 and 2003, inclusive. The dataset \nincludes the numbers of cars, total prize winnings, monthly consumer price index for the month \nof the race, track length, laps completed by the winner, spatial co-ordinates and name of track.", - "download": "http://jse.amstat.org/datasets/nascarr.dat.txt", - "filename": "nascarr", - "name": "NASCAR Race Results", - "number_format": 31, - "remove_quotes": true, - "separator": "SPACE", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": false - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "This data set contains performance statistics for National \nFootball League (NFL) teams for their 2000 regular season.\n Columns Variable Description\n 1 - 3 initials team initials\n 5 - 26 team name and location of the team\n 28 - 29 wins wins\n 31 - 32 losses losses\n 34 - 35 homedrives50 drives begun in opponents' territory\n 37 - 38 homedrives20 drives begun within 20 yards of the goal\n 40 - 41 oppdrives50 opponents drives begun in team's territory\n 43 oppdrives20 opponents drives begun within 20 yards of goal\n 45 homepuntblock punts blocked by team\n 47 opppuntblock punts team had blocked\n 49 - 50 hometouch touchdowns scored by team\n 52 - 53 opptouch touchdowns scored against team\n 55 - 58 homeyards total yardage gained by offense\n 60 - 63 oppyards total yardage allowed by defense\n 65 - 68 hometop time of possession by offense (in minutes)\n 70 - 73 opptop time of possession by opponents' offense\n 75 - 76 homefgm field goals made\n 78 - 79 oppfgm field goals allowed to opponents\n 81 - 82 homefga field goals attempted\n 84 - 85 oppfga field goals attempted by opponents\n 87 - 89 opppuntno punts made by opponents\n 91 - 94 opppuntave average length of punts made by opponents\n 96 - 99 opppuntnet average change in field position \n during opponents' punts\n101 - 102 opppunttb opponents' punts taken for touchbacks\n104 - 105 opppunt20 opponents' punts that resulted in the team's\n offense beginning within 20 yards of their \n own (defensive) goal line\n107 - 108 opppuntlong longest opponents' punt\n110 - 112 homepuntno punts made by team\n114 - 117 homepuntave average length of punts made by team\n119 - 122 homepuntnet average change in field position \n during team's punts\n124 - 125 homepunttb team's punts taken for touchbacks\n127 - 128 homepunt20 team's punts that resulted in the opponents'\n offense beginning within 20 yards of their \n own (defensive) goal line\n130 - 131 homepuntlong longest team punt\n133 - 135 home1sts first downs obtained by offense\n137 - 139 opp1sts first downs allowed by defense\n141 - 142 homesacks sacks achieved by team's defense\n144 - 145 oppsacks sacks allowed by team's offense\n147 - 148 homekos kickoffs made by team\n150 - 151 oppkos kickoffs received by team\n153 - 156 homekoyds yards gained during kickoff returns\n158 - 161 oppkoyds yards allowed to opposition during kickoff returns\n163 - 166 homekoave average yards gained during kickoff returns\n168 - 171 oppkoave average yards allowed during kickoff returns\n173 - 175 homekolong longest kickoff return made by team\n177 - 179 oppkolong longest kickoff return allowed by team\n181 homekotds kickoffs returned for a touchdown by team\n183 oppkotds kickoffs returned for touchdown by opposition\n185 - 186 homerets punts returned by team\n188 - 189 opprets punts returned by opposition\n191 - 192 homefc punts \"fair caught\" by team\n194 - 195 oppfc punts \"fair caught\" by opposition\n197 - 199 homeretyds return yardage on punts by team\n201 - 203 oppretyds return yardage on punts by opposition\n205 - 208 homeretave average length of punt returns by team\n210 - 213 oppretave average length of punt returns by opposition\n215 homerettds punts returned by team for a touchdown\n217 opprettds punts returned by opponents for a touchdown\n219 - 220 homeint interceptions made by team's defense\n222 - 223 oppint interceptions made against team's offense\n225 - 226 homerecover fumbles recovered by team's defense\n228 - 229 opprecover fumbles recovered by opposing defenses\n231 - 232 numgames games played by team\n234 - 237 opprateyds average number of yards gained \n per minute of possession by opponents\n239 - 242 homerateyds average number of yards gained \n per minute of possession by team\n244 - 247 opppuntrate average number of punts \n per minute of possession by opponents\n249 - 252 homepuntrate average number of punts \n per minute of possession by team\n254 - 258 oppratetd average number of touchdowns \n per minute of possession by opponents\n260 - 264 homeratetd average number of touchdowns \n per minute of possession by team\n266 - 269 winpercent winning percentage\n271 - 275 hometorate turnovers obtained by team,\n per minute of possession by opponents\n277 - 281 opptorate turnovers allowed by team, \n per minute of possession\n283 - 286 home1rate first downs obtained by team, \n per minute of possession\n288 - 291 opp1rate first downs allowed by team's defense, \n per minute of possession by opposition\n293 - 295 homepoints points scored by team\n297 - 299 opppoints points scored against team\n301 - 303 conference conference to which the team belongs (AFC or NFC)", - "download": "http://jse.amstat.org/datasets/nfl2000.dat.txt", - "filename": "nfl2000", - "name": "NFL Y2K PCA", - "number_format": 31, - "remove_quotes": true, - "separator": "SPACE", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": false - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Four datasets (nfl93.dat, nfl94.dat, nfl95.dat, nfl96.dat) contain\nNational Football League game results for recent seasons. In addition\nto game scores, the datasets give oddsmakers' pointspreads and\nover/under values for each game.", - "download": "http://jse.amstat.org/datasets/nfl93.dat.txt", - "filename": "nfl93", - "name": "NFL Scores and Pointspreads", - "number_format": 31, - "remove_quotes": true, - "separator": "SPACE", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": false - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Four datasets (nfl93.dat, nfl94.dat, nfl95.dat, nfl96.dat) contain\nNational Football League game results for recent seasons. In addition\nto game scores, the datasets give oddsmakers' pointspreads and\nover/under values for each game.", - "download": "http://jse.amstat.org/datasets/nfl94.dat.txt", - "filename": "nfl94", - "name": "NFL Scores and Pointspreads", - "number_format": 31, - "remove_quotes": true, - "separator": "SPACE", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": false - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Four datasets (nfl93.dat, nfl94.dat, nfl95.dat, nfl96.dat) contain\nNational Football League game results for recent seasons. In addition\nto game scores, the datasets give oddsmakers' pointspreads and\nover/under values for each game.", - "download": "http://jse.amstat.org/datasets/nfl95.dat.txt", - "filename": "nfl95", - "name": "NFL Scores and Pointspreads", - "number_format": 31, - "remove_quotes": true, - "separator": "SPACE", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": false - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Four datasets (nfl93.dat, nfl94.dat, nfl95.dat, nfl96.dat) contain\nNational Football League game results for recent seasons. In addition\nto game scores, the datasets give oddsmakers' pointspreads and\nover/under values for each game.", - "download": "http://jse.amstat.org/datasets/nfl96.dat.txt", - "filename": "nfl96", - "name": " NFL Scores and Pointspreads", - "number_format": 31, - "remove_quotes": true, - "separator": "SPACE", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": false - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The dataset contains scores for all regular season National Football\nLeague games from the 1998, 1999 and 2000 seasons. In addition to \nthe points scored by the home and visiting teams in each game, the\ndataset contains a pointspread that handicaps each game.\n\nColumns \n 1 - 4 Year (1998, 1999, or 2000)\n 6 - 7 Week of the season (1 to 17)\n10 - 27 Home team name\n29 - 30 Home team score\n33 - 50 Visiting team name\n52 - 53 Visiting team score\n56 - 60 Pointspread ", - "download": "http://jse.amstat.org/datasets/nfl98-00.dat.txt", - "filename": "nfl98-00", - "name": " NFL Scores for 1998-2000", - "number_format": 31, - "remove_quotes": true, - "separator": "SPACE", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": false - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": " The data set provides the weights (in lbs)\nof the 26 men on the 1996 US Olympic Rowing Team in Atlanta. The\ndata includes the names of the participants and which event they\nrowed in. The US team participated in 7 of the 8 possible events.\nThis data set is useful for discussing outliers,\nexplanations for outliers, and comparing the robustness of the\nmean and the median.\n\n", - "download": "http://jse.amstat.org/datasets/rowing.dat.txt", - "filename": "rowing", - "name": " Weights of 1996 US Olympic Rowing Team", - "number_format": 31, - "remove_quotes": true, - "separator": "SPACE", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": false - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Data consist of 500-yard freestyle swim times for male and female swimmers age 50-94 in a biennial national competition. Variables include year, gender, age, age group, swim time, seed time (qualifying time from state competition), and split times (in each 50-yard segment).", - "download": "http://jse.amstat.org/v22n1/doane/SeniorSwimTimes-DataSet.txt", - "filename": "SeniorSwimTimes", - "name": "SeniorSwimTimes", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Data are provided for the 56 Tour De France bicycle races since World \nWar II. The year and dates of the event, the total number of stages, \nthe total distance, the winning total time and average speed, the name \nand country of the winner, the birth date of the winner, and the \nwinner's age at the time of victory are the variables in the dataset.", - "download": "http://jse.amstat.org/datasets/tdf.dat.txt", - "filename": "tdf", - "name": "Tour De France Winners (Can Lance Win Six?)", - "number_format": 31, - "remove_quotes": true, - "separator": "SPACE", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - } - ], - "subcategory_name": "Sport" - }, - { - "datasets": [ - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "After purchasing a batch of flight helmets that did not fit the heads of many pilots, the NZ Airforce decided to mesure the headsizes of all recruits. Before this was carried out, information was collected to determine the feasibility of using cheap cardboard callipers to make the measurements, instead of metal ones which were expensive and uncomfortable. The data lists the head diameters of 18 recruits measured once using cardboard callipers and again using metal callipers. One question is whether there is any systematic difference between the two sets of callipers. One might also ask whether there is more variability in the cardboard callipers measurement than that of the metal callipers. ", - "download": "http://www.statsci.org/data/oz/nzhelmet.txt", - "filename": "nzhelmet", - "name": "Helmet Sizes for New Zealand Airforce", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "These data were collected as part of a project for the Federal Office for Road Safety conducted by the Research Institute of Gender and Health at the University of Newcastle. There is evidence that women drivers who are involved in motor vehicle accidents are more likely than men to be injured. A possible reason is that women often drive smaller cars that provide less protection in a collision. One of the aims of the project was to examine preferences for cars among men and women and investigate the extent to which safety was a factor in determining preferences. \nThe survey was conducted by research assistants who asked people in car parks to participate and administered a structured questionnaire. They were instructed to obtain data from men and women with small, medium and large cars, with 50 people per group for a total of 300 respondents. (The sample size was based on power requirements for another part of the survey that involved anthropometric measurements.) The research assistants approached people in car parks of the University of Newcastle and nearby shopping centres during December 1997 and January 1998. \nThe data consist of 300 records each with 22 variables. The variables are: \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nID\n\nIdentification number of respondent\n\nAge\n\nAge of respondent (years)\n\nSex\n\n1=female, 2=male\n\nLicYr\n\nTime they have held a full driving licence, in years and months (years)\n\nLicMth\n\nTime they have held a full driving licence, in years and months (months)\n\nActCar\n\nMake, model and year of car most often driven, coded to size of car 1=small, 2=medium, 3=large\n\nKids5\n\nChildren under five, 1=yes, 2=no\n\nKids6\n\nChildren 6 to 16, 1=yes, 2=no\n\nPrefCar\n\nPreferred car, coded to size of car 1=small, 2=medium, 3=large\n\nCar15k\n\nPreferred type of car if cost $15000, 1=small new car; 2=large second-hand car\n\nReason\n\n1=safety, 2=reliability, 3=cost, 4=performance, 5=comfort, 6=looks\n\nCost\n\nHow important is cost when buying a car? 1=not important, 2=little importance, 3=important, 4=very important\n\nReliable\n\nHow important is reliability ...?\n\nPerform\n\nHow important is performance ...?\n\nFuel\n\nHow important is fuel consumption ...?\n\nSafety\n\nHow important is safety ...?\n\nAC/PS\n\nHow important is air conditioning/power steering ...?\n\nPark\n\nHow important is ease of parking ...?\n \nRoom\n \nHow important is space/roominess ...?\n \nDoors\n \nHow important is the number of doors ...?\n \nPrestige\n \nHow important is prestige/style ...?\n \nColour\n \nHow important is colour ...?\n", - "download": "http://www.statsci.org/data/oz/carprefs.txt", - "filename": "carprefs", - "name": "Car Preferences", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Do you use up the same amount of the soap in the shower each morning, or does it depend on the size of the bar of soap? This data was collected by Rex Boggs of Glenmore State High School in Rockhampton, Queensland. Rex writes: \nI had a hypothesis that the daily weight of my bar of soap in my shower wasn't a linear function, the reason being that the tiny little bar of soap at the end of its life seemed to hang around for just about ever. I wanted to throw it out, but I felt I shouldn't do so until it became unusable. And that seemed to take weeks. \nAlso I had recently bought some digital kitchen scales and felt I needed to use them to justify the cost. I hypothesised that the daily weight of a bar of soap might be dependent upon surface area, and hence would be a quadratic function. \nI kept records for three weeks (the life of the bar), and was amazed to find that the data was linear with a very high R2 value, until the last few days of its life. \nThe data ends at day 22. On day 23 the soap broke into two pieces and one piece went down the plughole ... \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nDate \n\nDate of observation\n\nDay \n\nNumber of days since beginning of experiment\n\nWeight \n\nWeight of soap bar (grams)\n\n\n\n", - "download": "http://www.statsci.org/data/oz/soap.txt", - "filename": "soap", - "name": "Bar of Soap", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "These data are for specimens of 50 varieties of timber, for modulus of rigidity, modulus of elasticity and air dried density, arranged in increasing order of magnitude of the density. ", - "download": "http://www.statsci.org/data/oz/timber.txt", - "filename": "timber", - "name": "Timber Data", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "A soft drink bottler is analyzing vending machine service routes in his distribution system. He is interested in predicting the amount of time required by the route driver to service the vending machines in an outlet. This service activity including stocking the machine with beverage products and minor maintenance or housekeeping. The industrial engineer responsible for the study has suggested that the two most important variables affecting the delivery time are the number of cases of product stocked and the distance walked by the route driver. The engineer has collected 25 observations on delivery time (minutes), number of cases and distance walked (feet).", - "download": "http://www.statsci.org/data/general/softdrin.txt", - "filename": "softdrin", - "name": "Soft Drink Delivery Times", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Measurements of weight and tar, nicotine, and carbon monoxide content\nare given for 25 brands of domestic cigarettes.\n\nThe Federal Trade Commission annually rates varieties of domestic\ncigarettes according to their tar, nicotine, and carbon monoxide\ncontent. The United States Surgeon General considers each of these\nsubstances hazardous to a smoker's health. Past studies have shown\nthat increases in the tar and nicotine content of a cigarette are\naccompanied by an increase in the carbon monoxide emitted from the\ncigarette smoke.\n\nThe data presented here are taken from Mendenhall and Sincich (1992)\nand are a subset of the data produced by the Federal Trade Commission.\n\nFor more information, see the article \"Using Cigarette Data for an\nIntroduction to Multiple Regression\" by Lauren McIntyre in Volume 2,\nNumber 1, of the _Journal of Statistics Education_. ", - "download": "http://jse.amstat.org/datasets/cigarettes.dat.txt", - "filename": "cigarettes", - "name": "Cigarette data for an introduction to multiple regression", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": false - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Impact strength of insulation cuts in foot-pounds. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nLot\n\nLot of insulating material\n\nCut\n\nLengthwise (Length) or crosswise (Cross)\n\nStrength\n\nImpact strength in foot-pounds\n\n\n\n", - "download": "http://www.statsci.org/data/general/insulate.txt", - "filename": "insulate", - "name": "Impact Strength Of Insulation Cuts", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The data was collected by Stewart Fischer and David Tippetts, statistics students at the Queensland University of Technology in a subject taught by Dr Margaret Mackisack. Here is their description of the data and its collection: \nThe experiment decided upon was to see if by using two different designs of paper aeroplane, how far the plane would travel. In considering this, the question arose, whether different types of paper and different angles of release would have any effect on the distance travelled. Knowing that paper aeroplanes are greatly influenced by wind, we had to find a way to eliminate this factor. We decided to perform the experiment in a hallway of the University, where the effects of wind can be controlled to some extent by closing doors. \nIn order to make the experimental units as homogeneous as possible we allocated one person to a task, so person 1 folded and threw all planes, person 2 calculated the random order assignment, measured all the distances, checked that the angles of flight were right, and checked that the plane release was the same each time. \nThe factors that we considered each had two levels as follows: \nPaper: A4 size, 80gms and 50gms\nDesign: High Performance Dual Glider, and Incredibly Simple Glider (patterns attached to original report)\nAngle of release: Horizontal, or 45 degrees upward. \nThe random order assignment was calculated using the random number function of a calculator. Each combination of factors was assigned a number from one to eight, the random numbers were generated and accordingly the order of the experiment was found. \n\n\n\n\nVariable\n\nDescription\n\n\n\n\nDistance\n\nDistance travelled in mm\n\nPaper\n\n80gms = 1, 50gms = 2\n\nAngle\n\nHorizontal = 1, 45 degrees = 2\n\nDesign\n\nHigh-performance = 1, Incredibly simple = 2\n\nOrder\n\nOrder in which the runs were conducted\n", - "download": "http://www.statsci.org/data/oz/planes.txt", - "filename": "planes", - "name": "Paper Plane Experiment", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "\"Discovery Day\" is a day set aside by the United States Naval Postgraduate School in Monterey, California, to invite the general public into its laboratories. On Discovery Day, 21 October 1995, data on reaction time and hand-eye coordination were collected on 118 members of the public who visited the Human Systems Integration Laboratory. The age and sex of each subject were also recorded. Visitors were mostly in family groups. \nOne experiment which demonstrates motor learning and hand-eye coordination, is rotary pursuit tracking. The equipment used has a rotating disk with a 3/4\" target spot. The subject’s task is to maintain contact with the target spot with a metal wand. Trials were conducted for 15 seconds at a time, and the total contact time during the 15 seconds was recorded. Four trials were recorded for each of 108 subjects. \nThe target spot on the Circle tracker keeps constant speed in a circular path. The target spot on the Box tracker has varying speeds as it traverses the box, making the task potentially more difficult. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nSex\n\nMale (M) or female (F)\n\nAge\n\nAge of subject in years\n\nShape\n\nBox or Circle\n\nTrial1\n\nContact time for 1st trial\n\nTrial2\n\nContact time for 2nd trial\n\nTrial3\n\nContact time for 3rd trial\n\nTrial4\n\nContact time for 4th trial\n\n\n\n", - "download": "http://www.statsci.org/data/general/tracking.txt", - "filename": "tracking", - "name": "Rotary Pursuit Tracking", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Experiment conducted by Bill Afantenou, second year statistics student at QUT. Here is his description of the experiment: \n``As I am a big pizza lover, I had much pleasure in involving pizza in my experiment. I became curious to find out the time it took for a pizza to be delivered to the front door of my house. I was interested to see how, by varying whether I ordered thick or thin crust, whether Coke was ordered with the pizza and whether garlic bread was ordered with the pizza, the response would be affected. \n``Because of my current financial status and limitation of time, I decided to have only two replicates, just to get a reasonable estimate of the variance. To decrease my financial burden I managed a deal with the manager of the pizza shop. I managed to get the pickup special, delivered to my house, which was the cheapest and smallest pizza made. I tried to repeat the experiment in as nearly as possible identical conditions to reduce `noise'. \n``I ordered the pizza from the same shop, being Domino's Pizza. To be consistent I ordered a Supreme pizza each time at approximately the same time of day. The response was measured from the time I closed the telephone to the time the pizza was delivered to the front door of my house. \n``I wrote each of the eight treatments on a piece of paper twice, put them all into a hat, mixed them up, and took them out one at a time to allocate the order in which each treatment was done. \n``As well as the response and treatment for each pizza delivery the actual hour of delivery was recorded, also the order in which the treatments were done and whether the driver was male or female.'' \n\n\n\n\nVariable\n\nDescription\n\n\n\n\nCrust\n\nThin=0, Thick=1\n\nCoke\n\nNo=0, Yes=1\n\nBread\n\nGarlic bread. No=0, Yes=1\n\nDriver\n\nMale=M, Female=F\n\nHour\n\nTime of order in hours since midnight\n\nDelivery\n\nDelivery time in minutes\n", - "download": "http://www.statsci.org/data/oz/pizza.txt", - "filename": "pizza", - "name": "Pizza Delivery Experiment", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "An experiment is conducted to compare the energy requirements of three physical activities: running, walking and bicycle riding. Eight subjects are asked to run, walk and bicycle a measured distance, and the number of kilocalories expended per kilometre is determined for each subject during each activity. The activities are run in random order with time for recovery between activities. Each activity was monitored exactly once for each individual. ", - "download": "http://www.statsci.org/data/general/energy.txt", - "filename": "energy", - "name": "Energy Requirements Running, Walking and Cycling", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The data set comprises the results of a saturated 215-11 fractional factorial with 4 observations per run. There were 15 controllable factors. The responses are the proportional shrinkage of four samples taken from 3000-foot lengths of speedometer cable manufactured at each set of conditions. The objective was to reduce the post-extrusion shrinkage of the speedometer casing. \n\n\n\n\nVariable\n\nDescription\n\n\n\n\nA\n\nline OD\n\nB\n\nliner die\n\nC\n\nliner material\n\nD\n\nliner line speed\n\nE\n\nwire braid type\n\nF\n\nbraiding tension\n\nG\n\nwire diameter\n\nH\n\nliner tension\n\nI\n\nliner temperature\n\nJ\n\ncosting material\n\nK\n\ncoating die type\n\nL\n\nmelt temperature\n\nM\n\nscreen pack\n\nN\n\ncooling method\n\nO\n\nline speed\n\ny1\n\nshrinkage value of first sample\n\ny2\n\nshrinkage value of second sample\n\ny3\n\nshrinkage value of third sample\n\ny4\n\nshrinkage value of fourth sample\n", - "download": "http://www.statsci.org/data/general/speedome.txt", - "filename": "speedome", - "name": "Speedometer-Cable Shrinkage", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The data are from a Proctor and Gamble study reported by Smith and Dubey (1964) on the amount of available chlorine in a product as a function of time since manufacture. Theoretical considerations lead to the model \nChlorine = a + (0.49 - a) exp{ -b (Weeks - 8) } \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nWeeks\n\nTime in weeks since manufacture\n\nChlorine\n\nAvailable chlorine\n", - "download": "http://www.statsci.org/data/general/chlorine.txt", - "filename": "chlorine", - "name": "Available Chlorine", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The data give the normalized magnitudes of the voice data when the vowel 'ooh' was sung at a pitch of 290 Hz. A Kurzweil K2500 Sampler/Synthesizer was used to capture and to store the data. \nThe frequencies found in the signal can be used to identify the phonetical vowel, and are of interest in voice synthesis, therapy and training. Further details are given in Oliver (1997). \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nMagnitude\n\nNormalized Magnitudes at equi-spaced time intervals\n\n\n\n", - "download": "http://www.statsci.org/data/general/ooh.txt", - "filename": "ooh", - "name": "Voice Data from Singing the Vowel 'ooh'", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "In studies aimed at characterising an author's style, samples of n words are taken and the number of function words in each sample counted. Often binomial or Poisson distributions are assumed to hold for the proportions of function words. The table shows the combined frequencies (x) of the articles \"the\", \"a\" and \"an\" in samples from Macauley's \"Essay on Milton\", taken from the Oxford edition of Macualey's (1923) literary essays. Non-overlapping samples were drawn from opening words of two randomly chosen lines from each of 50 pages of printed text, 10 word samples being simply extensions of 5 word samples. The data show clear evidence of underdispersion.", - "download": "http://www.statsci.org/data/oz/wdcount.txt", - "filename": "wdcount", - "name": "Underdispersed Word Counts", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Progressive Insurance asked customers who had been involved in auto accidents how far they were from home when the accident happened. ", - "download": "https://dasl.datadescription.com/download/data/3039", - "filename": "accidents", - "name": "Accidents", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "At a barbershop music singing competition, choruses are judged on three scales: Music (quality of the arrangement, etc.), Performance, and Singing. The scales are supposed to be independent of each other, and each is scored by a different judge, but a friend claims that he can predict a chorus’s singing score from the other two […] ", - "download": "https://dasl.datadescription.com/download/data/3061", - "filename": "Barbershop-music", - "name": "Barbershop music", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "In 2016 13.27 million people attended a Broadway show, paying an average of more than $100 per ticket. The Broadway League, Inc. (https://www.broadwayleague.com/research/statistics-broadway-nyc/) provides some historical and current data. These variables are available for each year since the 1984-85 season: Season (The initial year of the season, so the 1984-85 season is 1984.) Gross ($M) […] ", - "download": "https://dasl.datadescription.com/download/data/3087", - "filename": "Broadway-shows", - "name": "Broadway shows", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Fast food is often considered unhealthy because much of it is high in both fat and sodium. But are the two related? The data give the fat and sodium contents of several brands of burgers. ", - "download": "https://dasl.datadescription.com/download/data/3088", - "filename": "Burgers", - "name": "Burgers", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The dataset holds facts about candy bars read from their nutrition labels. The data are a good example for multiple regression (e.g. what contributes to the calories of a candy bar?). For such an analysis, the indicator variable for nuts appears to work well. Note that 5 sugar-free candy bars are marked as NA in […] ", - "download": "https://dasl.datadescription.com/download/data/3092", - "filename": "Candy-bars", - "name": "Candy bars", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "In 1998, as an advertising campaign, the Nabisco Company announced a “1000 Chips Challenge,” claiming that every 18-ounce bag of their Chips Ahoy! cookies contained at least 1000 chocolate chips. Dedicated statistics students at the Air Force Academy randomly selected bags of cookies and counted the chocolate chips. The data report their counts. ", - "download": "https://dasl.datadescription.com/download/data/3110", - "filename": "Chips-Ahoy", - "name": "Chips Ahoy!", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The website rcdb.com, the Roller Coaster Database, holds facts about every roller coaster in the world, current or past. (If you know of one that is missing, please let the site master know.) These data are for recently opened coasters, most of which are still in operation.", - "download": "https://dasl.datadescription.com/download/data/3118", - "filename": "Coasters-2015", - "name": "Coasters 2015", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The data are drawn from the work of O. M. Latter in 1902 and were used in a fundamental textbook on statistical quality control by L. H. C. Tippett (1902–1985), one of the pioneers in that field. \n", - "download": "https://dasl.datadescription.com/download/data/3149", - "filename": "Cuckoos-and-quality-control", - "name": "Cuckoos and quality control", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The data are from a production process that makes 250 units each hour. The data were collected over a normal 12-hour shift one day.", - "download": "https://dasl.datadescription.com/download/data/3155", - "filename": "Defect-monitoring", - "name": "Defect monitoring", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The data are from a production process that makes 250 units each hour. The data were collected over a normal 12-hour shift one day. ", - "download": "https://dasl.datadescription.com/download/data/3156", - "filename": "Defect-monitoring_", - "name": "Defect monitoring second product", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Some students checked 6 bags of Doritos marked with a net weight of 28.3 grams. They carefully weighed the contents of each bag and recorded the weights in grams.", - "download": "https://dasl.datadescription.com/download/data/3171", - "filename": "Doritos", - "name": "Doritos", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "A student wants to investigate the effects of real vs.\nsubstitute eggs on his favorite brownie recipe. He enlists the\nhelp of 10 friends and asks them to rank each of 8 batches\non a scale from 1 to 10. Four of the batches were made with\nreal eggs, four with substitute eggs. The judges tasted the\nbrownies in random order.", - "download": "https://dasl.datadescription.com/download/data/3185", - "filename": "Eggs", - "name": "Eggs", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Many people fear Friday the 13th as an unlucky day. Researchers looked into this to see whether there were differences in traffic or in admissions to hospitals for road accidents on Friday 13th when compared with the adjacent Friday 6th. ", - "download": "https://dasl.datadescription.com/download/data/3219", - "filename": "Friday-13-Accidents", - "name": "Friday the 13th Accidents", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "As a project for an Introductory Statistics course, students checked 6 bags of Fritos marked with a net weight of 35.4 grams. They carefully weighed the contents of each bag, recording the weights (in grams):", - "download": "https://dasl.datadescription.com/download/data/3222", - "filename": "Fritos", - "name": "Fritos", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The movie Harry Potter and the Sorcerer’s Stone opened as a great success. But every movie sees declining revenue over time. The dataset gives the daily revenues for the movie during its first 17 days. ", - "download": "https://dasl.datadescription.com/download/data/3256", - "filename": "Harry-Potter-revenue", - "name": "Harry Potter revenue", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Is the Statue of Liberty’s nose too long? Her nose measures 4′6″, but she is a large statue, after all. Her arm is 42 feet long. That means her arm is 42/4.5 = 9.3 times as long as her nose. Is that a reasonable ratio? The data give arm and nose lengths of 18 girls ", - "download": "https://dasl.datadescription.com/download/data/3311", - "filename": "Libertys-nose", - "name": "Libertys nose", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Lottery numbers", - "download": "https://dasl.datadescription.com/download/data/3318", - "filename": "Lottery-numbers", - "name": "Lottery numbers", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Loyalty program", - "download": "https://dasl.datadescription.com/download/data/3319", - "filename": "Loyalty-program", - "name": "Loyalty program", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Movie lengths 2010", - "download": "https://dasl.datadescription.com/download/data/3348", - "filename": "Movie-lengths-2010", - "name": "Movie lengths 2010", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Students in an introductory statistics course were asked how many songs they had in their digital music library.", - "download": "https://dasl.datadescription.com/download/data/3352", - "filename": "Music-library", - "name": "Music library", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "New York State inspectors assess all bridges in the state every two years including a bridge’s individual parts. Bridges are analyzed for their capacity to carry vehicular loads. Inspectors are required to evaluate, assign a condition score, and document the condition of up to 47 structural elements, including rating 25 components of each span of a bridge, in addition to general components common to all bridges. The NYSDOT condition rating scale ranges from 1 to 7, with 7 being in new condition and a rating of 5 or greater considered as good conditionBridges that cannot safely carry heavy vehicles, such as some tractor trailers, are posted with weight limits. Based upon inspection and load capacity analysis, any bridge deemed unsafe gets closed.\nHow does the condition of the bridge relate to its age? Are there any outliers? Can you account for them by identifying them?", - "download": "https://dasl.datadescription.com/download/data/3364", - "filename": "New-York-bridges-2016", - "name": "New York bridges 2016", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The GfK Roper Reports® Worldwide Survey asked 30,000 consumers in 23 countries about their attitudes on health, beauty, and other personal values. One question participants were asked was how important their personal appearance is to them. The data are a contingency table of responses to this question by age decade. ", - "download": "https://dasl.datadescription.com/download/data/3392", - "filename": "Personal-appearance", - "name": "Personal appearance", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "BYU Human Performance Research Center http://www.byu.edu/chhp/intro.html#lrc Director: Mark Ricard 116A RB, (801) 378-8958", - "download": "https://dasl.datadescription.com/download/data/3445", - "filename": "Shirt-sizes", - "name": "Shirt sizes", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "A last is a form, traditionally made of wood, in the\nshape of the human foot. Lasts of various sizes are used by\nshoemakers to make shoes. In the United States, shoe sizes are\ndefined differently for men and women:\nU.S. men’s shoe size = (last size in inches * 3) – 24\nU.S. women’s shoe size = (last size in inches * 3) – 22.5\nBut in Europe, they are both: Euro size = last size in cm * 3/2\nThe data give the European shoe sizes of 269 college\nstudents (converted from their reported U.S. shoe sizes.)", - "download": "https://dasl.datadescription.com/download/data/3447", - "filename": "Shoe-Sizes", - "name": "Shoe Sizes", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The dataset gives the heights (in inches) of 130 members of a choir and the part they sing. Note that Sopranos and Altos are typically women and Tenors and Basses are typically men. ", - "download": "https://dasl.datadescription.com/download/data/3449", - "filename": "Singers-by-parts", - "name": "Singers by parts", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Sugar is a major ingredient in many breakfast cereals. The data gives the sugar content as a percentage of weight for 49 brands of cereal. Data were collected from nutrition labels in a supermarket. ", - "download": "https://dasl.datadescription.com/download/data/3467", - "filename": "Sugar-cereal", - "name": "Sugar in cereal", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The data give counts of 626 individuals categorized according to their “tattoo status” and their “hepatitis status.” Is there a relationship? ", - "download": "https://dasl.datadescription.com/download/data/3476", - "filename": "Tattoos", - "name": "Tattoos", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "A bank is studying the time that it takes 6 of its tellers to serve an average customer. Customers line up in the queue and then go to the next available teller. Is there a difference? Can we pick out the best or worst performing teller? ", - "download": "https://dasl.datadescription.com/download/data/3478", - "filename": "Tellers", - "name": "Tellers", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Since 1994, the Best Roller Coaster Poll (www. ushsho.com/bestrollercoasterpoll.htm) has been ranking the world’s best roller coasters. In 2013, Bizarro dropped to 4th after earning the top steel coaster rank for six straight years. Data on the top 14 steel coasters from this poll are given. ", - "download": "https://dasl.datadescription.com/download/data/3481", - "filename": "Thrills-2013", - "name": "Thrills 2013", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The Minnesota Department of Transportation\nhoped that they could measure the weights of big trucks without\nactually stopping the vehicles by using a newly developed\n“weight-in-motion” scale. To see if the new device was accurate,\nthey conducted a calibration test. They weighed several stopped\ntrucks (Static Weight) and assumed that this weight was correct.\nThen they weighed the trucks again while they were moving to\nsee how well the new scale could estimate the actual weight.", - "download": "https://dasl.datadescription.com/download/data/3512", - "filename": "Vehicle-weights", - "name": "Vehicle weights", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Washing", - "download": "https://dasl.datadescription.com/download/data/3515", - "filename": "Washing", - "name": "Washing", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Consumer Reports tested 11 brands of vanilla yogurt and found these numbers of calories per serving. ", - "download": "https://dasl.datadescription.com/download/data/3544", - "filename": "Yogurt_", - "name": "Yogurt", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Yogurt flavors", - "download": "https://dasl.datadescription.com/download/data/3545", - "filename": "Yogurt-flavors", - "name": "Yogurt flavors", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "http://jse.amstat.org/datasets/aptness.txt", - "download": "http://jse.amstat.org/datasets/aptness.dat.txt", - "filename": "Aptness", - "name": "Evaluating Aptness of a Regression Model", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": false - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "In many statistical models the normal distribution of the response is an essential assumption.\nThis paper uses a dataset of 2000 euro coins with information (up to the milligram) about\nthe weight of each coin. As the physical coin production process is subject to a multitude\nof (very small) variability sources, it seems reasonable to expect that the empirical\ndistribution of the weight of euro coins does agree with the normal distribution. Goodness\nof fit tests however show that this is not the case. Moreover, some outliers complicate\nthe analysis. Mixtures of normal distributions and skew normal distributions are fitted\nto the data, revealing that the normality assumption might not hold for those weights.", - "download": "http://jse.amstat.org/datasets/euroweight.dat.txt", - "filename": "euroweight", - "name": "The Weight of Euro Coins ", - "number_format": 31, - "remove_quotes": true, - "separator": "SPACE", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": false - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Title, year of release, length in minutes, number of cast members listed, rating, and number of lines \nof description are recorded for a simple random sample of 100 movies. One can use the sample to obtain base-line information \non the movie guide from which the data were collected. The dataset also illustrates two paradoxes for associations between \nthree variables: non-transitivity of positive correlation and Simpson's paradox. SOURCE: The data were taken as a simple \nrandom sample of the approximately 19,000 movies (not including made-for-TV movies) in Leonard Maltin's Movie and Video \nGuide, 1996. ", - "download": "http://jse.amstat.org/datasets/films.dat.txt", - "filename": "films", - "name": "films dataset", - "number_format": 31, - "remove_quotes": true, - "separator": "SPACE", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": false - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "This dataset contains descriptive data of contestants on the game shoe \"Friend or Foe?\". Information on the contestant's \nrace, sex, age, prize money, and playing strategy are included. ", - "download": "http://jse.amstat.org/datasets/friend_or_foe.dat.txt", - "filename": "friend_or_foe", - "name": "Data from the Television Game Show \"Friend or Foe?\"", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The dataset contains hat size as well as circumference, length of major axis \nand length of minor axis of the inner hat band for 26 hats. The manufacturer \nand the country of manufacture are also included.", - "download": "http://jse.amstat.org/datasets/hats.dat.txt", - "filename": "hats", - "name": " Hat measurements, including hat size", - "number_format": 31, - "remove_quotes": true, - "separator": "SPACE", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": false - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The dataset consists of samples of size six taken without replacement\nfrom the integers {1, 2, 3, ..., 42}. There are actually three\ndatasets from three different sources, and in each case the six-tuples\nare (in theory) random selections or samples. The observations in each\nsample are given in the order in which they were obtained or selected.", - "download": "http://jse.amstat.org/datasets/lotto.dat.txt", - "filename": "lotto", - "name": "Lotto 6/42 Selections from Individuals, Irish National Lottery, and S-Plus Simulation", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": false - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "This file contains daily per theater box office receipts for 49 \nmovies. This data is to accompany the article entitled Movie Data.", - "download": "http://jse.amstat.org/datasets/moviedaily.dat.txt", - "filename": "moviedaily", - "name": "moviedaily", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Every year actors and actresses are chosen to receive the Oscars awards for best actor and for best actress. This dataset \ncontains information about each of the winners for each of the 77 annual Oscar awards.\n\nAlthough there have been only 77 Oscars, there are 78 male winners and 78 female winners because ties happened on two \noccasions (1933 for the best actor and 1969 for the best actress).\n\nVARIABLE DESCRIPTIONS:\n\nColumns Variables\n 1 Gender (m=male f=female)\n 3-4 Oscar Year Number (1-77)\n 6-9 Year the Oscar Took Place\n 11-29 Winner’s first and last name\n 31-60 Name of the Movie in which the winner acted\n 62-63 Age of winner (at the beginning of the winning year)\n 65-77 Birth place (State if born in USA, else Country)\n 79-80 Month in which the winner was born\n 82-83 Day of month on which winner was born\n 85-88 Year the winner was born", - "download": "http://jse.amstat.org/datasets/oscars.dat.txt", - "filename": "oscars", - "name": "Oscars: Best Actors and Actresses", - "number_format": 31, - "remove_quotes": true, - "separator": "SPACE", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": false - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "This dataset contains information collected from rolling the pair of\npigs (found in the game \"Pass the Pigs\") 6000 times. A description of\nthe rules, scoring configurations, and data collection method are\nincluded in the accompanying paper.", - "download": "http://jse.amstat.org/datasets/pig.dat.txt", - "filename": "pig", - "name": "Data from the game \"Pass the Pigs\"", - "number_format": 31, - "remove_quotes": true, - "separator": "SPACE", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": false - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "In a residential home, energy consumption is closely related to the\noutdoor temperature and size of the house. In a home of a given size,\ntemperature fluctuations and energy consumption vary fairly predictably\nover time. When homeowners add a room, other things being equal,\nutility usage should increase. This dataset permits students to\nestimate the energy demand and make forecasts for future months, as\nwell as explore other relationships.\n\nThe dataset contains natural gas and electricity usage data for a\ngas-heated single-family residence in the Boston area from September\n1990 through May 1997, accompanied by monthly climatological data. \nThe dataset is useful for illustrating the concepts and techniques of\ncentral tendency, dispersion, elementary time series analysis,\ncorrelation, simple and multiple regression, and variable\ntransformations.", - "download": "http://jse.amstat.org/datasets/utility.dat.txt", - "filename": "utility", - "name": "What Does It Take to Heat a New Room? ", - "number_format": 31, - "remove_quotes": true, - "separator": "SPACE", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": false - } - ], - "subcategory_name": "Other" - }, - { - "datasets": [ - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The data give the number of deaths in prison custody in Australia in each of the six years 1990 to 1995, given separately for Aboriginal and Torres Strait Islanders (indigenous) and others (non-indigenous). \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nYear\n\n1990 through 1995\n\nIndigenous\n\nYes = Aboriginal or Torres Strait Islander, No = Non-indigenous\n\nPrisoners\n\nTotal number in prison custody\n\nDeaths\n\nNumber of deaths in prison custody\n\nPopulation\n\nAdult population (15+ years)\n\n\n\n\nThe data were collected in response to the Royal Commission into Aboriginal Deaths in Custody, the final report of which was tabled in the Federal Parliament on the 9 May 1991. \nThe report of the Royal Commission has two streams. One is concerned with the ninety-nine Aboriginal and Torres Strait Islander deaths in custody which occurred throughout Australia during the period 1 January 1980 to 31 May 1989. Issues around the causes of death, culpability of custodians and their employers, and the prevention of future deaths were addressed in depth. The second stream concerned what the Royal Commission called the ‘underlying issues’: the social, cultural, and legal factors which, in the view of the Commissioners, had some bearing on the deaths. These underlying issues, as revealed from the chapter headings of the Royal Commission’s National Report, included the Legacy of History, Aboriginal Society Today, Relations With the Non-Aboriginal Community, The Harmful Use of Alcohol and Other Drugs, Schooling, Employment, Unemployment and Poverty, Housing and Infrastructure, Land Needs, and Self-determination. \nThe link between the Royal Commission’s discussion of the individual deaths investigated, the prevention of future deaths and the underlying issues, is its position on the over-representation of Indigenous people in custody in Australia. A central conclusion of the Royal Commission, illustrating this point, was as follows: \nThe work of the commission has established that Aboriginal people in custody do not die at a greater rate than non-Aboriginal people in custody. \nHowever, what is overwhelming different is the rate at which Aboriginal people come into custody, compared with the rate of the general community ... The ninety-nine who died in custody illustrate that over-representation and, in a sense, are the victims of it. \nThe conclusions are clear. Aboriginal people die in custody at a rate relevant to their proportion of the whole population which is totally unacceptable and which would not be tolerated if it occurred in the non-Aboriginal community. But this occurs not because Aboriginal people in custody are more likely to die than others in custody, but because the Aboriginal population is grossly over-represented in custody. Too many Aboriginal people are in custody too often (Johnston, 1991, Vol 1, p6).", - "download": "http://www.statsci.org/data/oz/custody.txt", - "filename": "custody", - "name": "Aboriginal Deaths in Custody", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Facts on the countries of Asia. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nCountry \n\nName\n\nArea \n\nTotal area (sq km)\n\nPopulation \n\nPopulation July 1995 est.\n\nLife \n\nLife Expectancy 1995 est. (years)\n\nGDP \n\nGDP 1994 (US$ billions)\n\nGDP/caput \n\nGDP per person 1994 est (US$)\n\n\n\n", - "download": "http://www.statsci.org/data/oz/asia.txt", - "filename": "asia", - "name": "Countries of Asia", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The United States Census Bureau keeps track of the number of adoptions in each State (and Washington D.C.). The data includes the population of each state as well. How should adoptions be summarized and displayed? ", - "download": "https://dasl.datadescription.com/download/data/3043", - "filename": "Adoptions", - "name": "Adoptions", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "-", - "download": "https://dasl.datadescription.com/download/data/3177", - "filename": "Drivers-Licenses-2014", - "name": "Drivers Licenses 2014", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Do flexible work schedules reduce the demand for resources? The Lake County, Illinois, Health Department experimented with a flexible four-day workweek. For a year, the department recorded the mileage driven by 11 field workers on an ordinary five-day workweek. Then it changed to a flexible four-day workweek and recorded mileage for another year. ", - "download": "https://dasl.datadescription.com/download/data/3540", - "filename": "Work-week", - "name": "Work week", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - } - ], - "subcategory_name": "Administration" - }, - { - "datasets": [ - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "The data give the survival status of passengers on the Titanic, together with their names, age, sex and passenger class. \nAbout half of the ages for the 3rd Class passengers are missing, although a good many of these could be filled in from the original source below. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nName\n\nRecorded name of passenger\n\nPClass\n\nPassenger class: 1st, 2nd or 3rd\n\nAge\n\nAge in years\n\nSex\n\nmale or female\n\nSurvived\n\n1 = Yes, 0 = No\n\n\n\n", - "download": "http://www.statsci.org/data/general/titanic.txt", - "filename": "titanic_", - "name": "Passengers on the Titanic", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "For each of ten streets with bike lanes, investigators measured the distance between the centre line and a cylist in the bike lane. They used photography to determine the distance between the cyclist and a passing car on those same ten streets, recording all distances in feet. \n", - "download": "http://www.statsci.org/data/general/cyclist.txt", - "filename": "cyclist", - "name": "Distance of Cars from Cyclists", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Hourly carbon monoxide (CO) averages were recorded on summer weekdays at a measurement station in Los Angeles. The station was established by the Environmental Protection Agency as part of a larger study to assess the effectiveness of the catalytic converter. It was located about 25 feet from the San Diego Freeway, which in this particular area is located at 145 degrees north. It was located such that winds from 145 to 325 degress (which in the summer are the prevalent wind directions during the daylight hours) transport the CO emissions from the highway toward the measurement station. Aggregate measurements were recored for each hour of the day 1 to 24. \nHour \n- \nhour of the day, from midnight to midnight \nCO \n- \naverage summer weekday CO concentration (parts per million) \nTD \n- \naverage weekday traffic density (traffic count/traffic speed) \nWS \n- \naverage perpendicular wind-speed component,\nwind speed x cos(wind direction - 235 degrees) \n\nIt would be interesting to have wind speed and direction recorded separately. ", - "download": "http://www.statsci.org/data/general/cofreewy.txt", - "filename": "cofreewy", - "name": "Carbon Monoxide from a Freeway", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "This is a highly fractionated two-level factorial design employed as a screening design in an off-line welding experiment performed by the National Railway Corporation of Japan. There were 16 runs and 9 experimental factors. The response variable is the observed tensile strength of the weld, one of several quality characteristics measured. All other variables are at plus and minus levels. \n\n\n\n\nVariable\n\nDescription\n\n\n\n\nRods\n\nKind of welding rods\n\nDrying\n\nPeriod of drying\n\nMaterial\n\nWelded material\n\nThickness\n\nThickness\n\nAngle\n\nAngle\n\nOpening\n\nOpening\n\nCurrent\n\nCurrent\n\nMethod\n\nWelding method\n\nPreheating\n\nPreheating\n\nStrength\n\nTensile strength of the weld in kg/mm\n", - "download": "http://www.statsci.org/data/general/welding.txt", - "filename": "welding", - "name": "Tensile Strength of Welds", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Military pilots sometimes black out when their brains are deprived of oxygen due to G-forces during violent maneuvers. Glaister and Miller (1990) produced similar symptoms by exposing volunteers’ lower bodies to negative air pressure, likewise decreasing oxygen to the brain. The data lists the subjects' ages and whether they showed syncopal blackout related signs (pallor, sweating, slow heartbeat, unconsciousness) during an 18 minute period. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nSubject\n\nInitials of the subject's name\n\nAge\n\nSubject's age in years\n\nSigns\n\nWhether subject showed blackout-related signs (0=No, 1=Yes)\n", - "download": "http://www.statsci.org/data/general/gforces.txt", - "filename": "gforces", - "name": "G-Induced Loss of Consciousness", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Commercial airlines overbook flights, selling more tickets than they have seats, because a sizeable number of reservation holders don’t show up in time for their flights. But sometimes, there are more passengers wishing to board than there are seats. Most airlines try to entice travelers to voluntarily give up their seats in return for free […] ", - "download": "https://dasl.datadescription.com/download/data/3048", - "filename": "Airline-bumping", - "name": "Airline bumping 2017", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, + } + ], + "subcategory_name": "Other" + }, + { + "datasets": [ { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "https://www.albany.edu/sourcebook/csv/t3177.csv adapted from: U.S. Department of Transportation, Federal Aviation Administration, Semiannual Report to Congress on the Effectiveness of the Civil Aviation Security Program, July 1 to \"December 31, 1978, Exhibit 10; July 1 to December 31, 1982, Exhibit 10; July 1 to December 31, 1984, Exhibit 7; July 1 to December 31, 1989, p. 11 (Washington, DC: U.S. Department of Transportation); U.S. Department of Transportation, Federal Aviation Administration, Annual Report to Congress on Civil Aviation Security, January 1, 1993-December 31, 1993, p. 9; January 1, 1995-December 31, 1995, p. 11 (Washington, DC: U.S. Department of Transportation); and data provided by the U.S. Department of Transportation, Federal Aviation Administration and Bureau of Transportation Statistics [Online]. Available: http://www.bts.gov/publications/national_transportation_statistics/ 2003/html/table_02_16.html [May 24, 2004]. Table adapted by SOURCEBOOK staff.", - "download": "https://dasl.datadescription.com/download/data/3049", - "filename": "Airport-screening", - "name": "Airport screening", + "description": "Why do older people often seem not to remember things as well as younger people? Do they not pay attention? Do they just not process the material as thoroughly? One theory regarding memory is that verbal material is remembered as a function of the degree to which is was processed when it was initially presented. Eysenck (1974) randomly assigned 50 younger subjects and 50 older (between 55 and 65 years old) to one of five learning groups. The Counting group was asked to read through a list of words and count the number of letters in each word. This involved the lowest level of processing. The Rhyming group was asked to read each word and think of a word that rhymed with it. The Adjective group was asked to give an adjective that could reasonably be used to modify each word in the list. The Imagery group was instructed to form vivid images of each word, and this was assumed to require the deepest level of processing. None of these four groups was told they would later be asked to recall the items. Finally, the Intentional group was asked to memorize the words for later recall. After the subjects had gone through the list of 27 items three times they were asked to write down all the words they could remember. \n\n\n\n\nVariable\n\nDescription\n\n\n\n\nAge\n\nYounger or Older\n\nProcess\n\nThe level of processing: Counting, Rhyming, Adjective, Imagery or Intentional\n\nWords\n\nNumber of words recalled\n", + "download": "http://www.statsci.org/data/general/eysenck.txt", + "filename": "eysenck", + "name": "Age and Memory", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "The Bicycle Helmet Safety Institute website includes a report on the number of bicycle fatalities per year in the United States. The data gives the counts for the years 1994–2015. ", - "download": "https://dasl.datadescription.com/download/data/3073", - "filename": "Bike-safety-2015", - "name": "Bike safety 2015", + "description": "Nolen-Hoeksema and Morrow (1991) had the good fortune to have measured depression among college students 2 weeks before the Loma Prieta earthquake in California in 1989. Nolen-Hoeksema and Morrow collected repeat data to track the students’ adjustments to the earthquake. Measurements were taken every 3 weeks starting 2 weeks before the earthquake to 10 weeks after. The data were recreated by Howell (1999) based on the Nolen-Hoeksema and Morrow findings. Each row gives the depression scores for one student.\n\n\nVariable\n\nDescription\n\n\n\n\n\nWeek0\n\nDepression scores 2 weeks before the earthquake\n\nWeek3\n\nDepression scores one week the quake\n\nWeek6\n\nDepression scores 4 weeks after the quake\n\nWeek9\n\nDepression scores 7 weeks after the quake\n\nWeek12\n\nDepression scores 10 weeks after the quake\n", + "download": "http://www.statsci.org/data/general/lomaprie.txt", + "filename": "lomaprie", + "name": "Depression Before and After an Earthquake", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true - }, + } + ], + "subcategory_name": "Psychology" + } + ] + }, + { + "category_name": "Nature", + "subcategories": [ + { + "datasets": [ { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "The dataset is the number of camp sites at each of the public parks in Vermont ", - "download": "https://dasl.datadescription.com/download/data/3091", - "filename": "Camp-sites", - "name": "Camp sites", + "description": "Daily rainfall (in millimetres) was recorded over a 47-year period in Turramurra, Sydney, Australia. For each year, the wettest day was identified (that having the greatest rainfall). The data show the rainfall recorded for the 47 annual maxima.", + "download": "http://www.statsci.org/data/oz/sydrain.txt", + "filename": "sydrain", + "name": "Annual Maximums of Daily Rainfall in Sydney", "number_format": 31, "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, + "separator": "auto", + "simplify_whitespaces": true, + "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "The data give the number of domestic U.S. flights flown in each year from 2000 to 2016 ", - "download": "https://dasl.datadescription.com/download/data/3209", - "filename": "Flights-2016", - "name": "Flights 2016", + "description": "These data were collected in a cloud-seeding experiment in Tasmania between mid-1964 and January 1971. The rainfalls are period rainfalls in inches. \nSeeded\n - \nS = seeded, U = unseeded\nSeason\n - \nAutumn, Winter, Spring Summer\nTE\n - \nrainfall in east target area\nTW\n - \nrainfall in west target area\nNC\n - \nrainfall in north control area\nSC\n - \nrainfall in south control area\nNWC\n - \nrainfall in north-west conrol area\n\n", + "download": "http://www.statsci.org/data/oz/cloudtas.txt", + "filename": "cloudtas", + "name": "Cloud Seeding in Tasmania", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "The Bureau of Transportation Statistics of the U.S. Department of Transportation publishes information about airline performance. The data report the percentage of flights departing on time each month from January 1994 through June 2016. ", - "download": "https://dasl.datadescription.com/download/data/3210", - "filename": "Flights-on-time-2016", - "name": "Flights on time 2016", + "description": "The data are monthly averaged atmospheric pressure differences between Easter Island and Darwin, Australia. This difference drives the trade winds in the southern hemisphere. An annual cycle may be expected, and also longer cycles corresponding to the El Nino and to the Southern Oscillations. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nPressure\n\nMonthly average atmospheric pressure differences\n", + "download": "http://www.statsci.org/data/oz/enso.txt", + "filename": "enso", + "name": "Pressure Difference between Easter Island and Darwin", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "Many people fear Friday the 13th as an unlucky day. Researchers looked into this to see whether there were differences in traffic or in admissions to hospitals for road accidents on Friday 13th when compared with the adjacent Friday 6th. ", - "download": "https://dasl.datadescription.com/download/data/3220", - "filename": "Friday-13-traffic", - "name": "Friday the 13th traffic", + "description": "Daily rainfall for Melbourne, from 1 January 1981 to 31 December 1990. Note that this series is 3 observations longer than the temperature series.", + "download": "http://www.statsci.org/data/oz/melbrain.txt", + "filename": "melbrain", + "name": "Melbourne Daily Rainfall", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "Many drivers of cars that can run on regular gas actually buy premium in the belief that they will get better gas mileage. To test that belief, we use 10 cars from a company fleet in which all the cars run on regular gas. Each car is filled first with either regular or premium gasoline, decided by a coin toss, and the mileage for that tankful is recorded. Then the mileage is recorded again for the same cars for a tankful of the other kind of gaso-line. We don’t let the drivers know about this experiment.", - "download": "https://dasl.datadescription.com/download/data/3230", - "filename": "Gasoline__", - "name": "Gasoline", + "description": "Daily minimum and maximum temperatures for Melbourne, from 1 January 1981 to 31 December 1990. The two February 29 leap days are excluded, so there are 10 x 365 = 3650 observations.", + "download": "http://www.statsci.org/data/oz/melbtemp.txt", + "filename": "melbtemp", + "name": "Melbourne Temperatures", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "Much of the public and private industry in Hawaii depends on tourism. The following time series plot shows the number of domestic visitors to Hawaii by air from the rest of the United States per month from January 2002 through December 2006 before the financial crisis of 2008. ", - "download": "https://dasl.datadescription.com/download/data/3257", - "filename": "Hawaii-tourism", - "name": "Hawaii tourism", + "description": "Rainfall for each 6-day period for Adelaide from 1839 to 1977 inclusive. December 31 of the previous year is included in the non-leap years to make 15 6-day periods for each year. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nYear\n\n1839 - 1977\n\nPeriod\n\n1 - 61 for each year\n\nRainfall\n\nRainfall in \n", + "download": "http://www.statsci.org/data/oz/adelrain.txt", + "filename": "adelrain", + "name": "Adelaide Rainfall", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "The data report the percentage of flights that were late and the percentage that departed on time for each month from 1995 through early 2016 ", - "download": "https://dasl.datadescription.com/download/data/3309", - "filename": "Late-arrivals-2016", - "name": "Late arrivals 2016", + "description": "Daily 6am and 3pm temperatures for Brisbane for the decade 1977 - 1986. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nDay\n\nDay as YearMonthDay\n\nTemp06\n\n6am Temperature in degrees Celsius x 10\n\nTemp15\n\n3pm Temperature in degrees Celsius x 10\n", + "download": "http://www.statsci.org/data/oz/bristemp.txt", + "filename": "bristemp", + "name": "Brisbane Temperatures", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "The Research and Innovative Technology Administration of the Bureau of Transportation Statistics reports load factors (passenger-miles as a percentage of available seat miles) for commercial airlines for every month from October 2002 through 2017 for both domestic and international flights. ", - "download": "https://dasl.datadescription.com/download/data/3315", - "filename": "Load-factors-2016", - "name": "Load factors 2016", + "description": "The columns in the data set represent the precipitation weighted mean concentrations of ions for the year 1986, for 47 sites in the United Kingdom. \n \nVariable\n \nDescription\n\nSite\n\nSite number \n\nRain\n\nRain (measured in mm) \n\nH\n\nH+ \n\nSO4\n\nSO4-2 \n\nNO3\n\nNO3- \n\nNH4\n\nNH4+ \n\nx\n\nx-coordinate (measured in cm) \n\ny\n\ny-coordinate (cm) \n\nThe measurement of NH4+ for site number 35 was not available and is represented by NA in the data set. The x- and y-cordinates were measured in cm from a map of the UK. ", + "download": "http://www.statsci.org/data/general/rainuk.txt", + "filename": "rainuk", + "name": "Acid Rain in the UK", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true - }, + } + ], + "subcategory_name": "Weather" + }, + { + "datasets": [ { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "The Research and Innovative Technology Administration of the Bureau of Transportation Statistics reports load factors (passenger-miles as a percentage of available seat miles) for commercial airlines for every month from October 2002 through 2017 for both domestic and international flights. ", - "download": "https://dasl.datadescription.com/download/data/3316", - "filename": "Load-factors-2017", - "name": "Load factors 2017", + "description": "The ocean swell produces spectacular eruptions of water through a hole in the cliff at Kiama, about 120km south of Sydney, known as the Blowhole. The times at which 65 successive eruptions occurred from 1340 hours on 12 July 1998 were observed using a digital watch. \nJim Irish writes \nAnyone who has visited the Blowhole more than once knows that the rate and volume of eruptions varies. This variation occurs at several timescales. We might expect that part is explained by the tides, so that eruptions are more frequent and spectacular when the tide is very high, and eruptions obviously depend on the presence of a large ocean swell generated by prolonged strong winds over the ocean well offshore from Kiama. Hence, any stochastic model fitted to data observed over a short period of time is only applicable to that period, and perhaps a few hours either side of the observations. But we might infer from the model fitted to those data that a similar model applies more generally. ", + "download": "http://www.statsci.org/data/oz/kiama.txt", + "filename": "kiama", + "name": "Kiama Blowhole Eruptions", "number_format": 31, "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, + "separator": "auto", + "simplify_whitespaces": true, + "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "The data give the number of passengers at Oakland (CA) airport month by month since 1997. ", - "download": "https://dasl.datadescription.com/download/data/3371", - "filename": "Oakland-passengers-2016", - "name": "Oakland passengers 2016", + "description": "The data records the length of rivers in the South Island of New Zealand. The lengths are given in kilometres. The second variable, FlowsInto, indicates whether the river flows into the Pacific Ocean (0) or the Tasman Sea (1). A map of the island's rivers is included here.", + "download": "http://www.statsci.org/data/oz/nzrivers.txt", + "filename": "nzrivers", + "name": "Length of New Zealand Rivers", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "The National Highway Traffic Safety Administration reports seat belt use and fatalities in car accidents by state. How do fatalities relate to seat belt use? ", - "download": "https://dasl.datadescription.com/download/data/3442", - "filename": "Seat-belts-2015", - "name": "Seat belts 2015", + "description": "Date on the concentration of polychlorinated biphenyl (PCB) residues in a series of lake trout from Cayuga Lake, NY, were reported in Bache et al (1972). The ages of the fish were accurately known, because the fish were annually stocked as yearlings and distinctly marked as to year class. Each whole fish was mechanically chopped, ground, and thoroughly mixed, and 5-gram samples taken. The samples were treated and PCB residues in parts per million (ppm) were estimated using column chromatography. \nBates and Watts (1988) use a linear model \nlog(PCB) = b1 + b2 Age1/3 \nbut they remark that the nonlinear model \nlog(PCB) = b1 + b2 Ageq \nis slightly better. \n\n\n\n\nVariable\n\nDescription\n\n\n\n\nAge\n\nAge of trout (years)\n\nPCB\n\nPCB concentration (ppm)\n", + "download": "http://www.statsci.org/data/general/troutpcb.txt", + "filename": "troutpcb", + "name": "PCB Concentrations in Lake Trout", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "The data report the density (cars per mile) and average speed of traffic on city highways. The data were collected at the same location at 10 different times randomly selected within a span of 3 months. ", - "download": "https://dasl.datadescription.com/download/data/3560", - "filename": "Speed-density", - "name": "Speed and density", + "description": "Jaffe, Parker and Wilson have investigated the concentration of several hydrophobic organic substances (such as hexachlorobenzene, chlordane, heptachlor, aldrin, dieldrin, endrin) in the Wolf River in Tennessee. Measurements were taken downstream of an abandoned dump site that had previously been used by the pesticide industry to dispose of its waste products. \nIt was expected that these hydrophic substances might have a nonhomogeneous vertical distribution in the river because of differences in density between these compounds and water and because of the adsorption of these compounds on sediments, which could lead to higher concentrations on the bottom. It is important to check this hypothesis because the standard procedure of sampling at six-tenths of the depth could miss the bulk of these pollutants if the distribution were not uniform. \nGrab samples were taken with a La Motte-Vandorn water sampler of 1 litre capacity at various depths of the river. This sampler consists of a horizontal plexiglas tube of 7 centimetres diameter and a plunger of each side which shuts the sampler when the sampler is at the desired depth. Ten surface, 10 mid-depth and 10 bottom samples were collected, all within a relatively short period. Until they were analysed the samples were stored in 1-quart mason jars at low temperature. \nIn the analysis of the samples, a 250-millilitre water sample was taken from each mason jar and was extracted with 1 millilitre of either hexanes or petroleum ether. A sample of the extract was then injected into a gas chromatograph and the output was compared against standards of known concentrations. The test procedure was repeated two more times, injecting different samples of the extract in the gas chromatograph. The average aldrin and hexachlorobenzene (HCB) concentrations (in nanograms per liter) in these 30 samples are given in the data.", + "download": "http://www.statsci.org/data/general/wolfrive.txt", + "filename": "wolfrive", + "name": "Wolf River Pollution", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "A tire manufacturer tested the braking performance of one of its tire models on a test track. The company tried the tires on 10 different cars, recording the stopping distance for each car on both wet and dry pavement. ", - "download": "https://dasl.datadescription.com/download/data/3460", - "filename": "Stopping-distance", - "name": "Stopping distance", + "description": "The following data from the Statistical Abstract of the United States give the number of accidental oil spills at sea and the amount of oil lost in these spills for the years 1973 - 1985. \n\n\n\n\nVariable\n\nDescription\n\n\n\n\nYear\n\nYear\n\nSpills\n\nNumber of spills\n\nOil\n\nAmount of oil lost (thousands of metric tonnes)\n\n\n\n", + "download": "http://www.statsci.org/data/general/spills.txt", + "filename": "spills", + "name": "Accidental Oil Spills", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "A tire manufacturer tested the braking performance of one of its tire models on a test track. The company tried the tires on 10 different cars, recording the stopping distance for each car on both wet and dry pavement from 60 miles per hour. The test was run on both dry and wet pavement. (The actual braking distance takes into account the driver’s reaction time, which typically adds nearly 300 feet at 60 mph!)", - "download": "https://dasl.datadescription.com/download/data/3461", - "filename": "Stopping-distance-60", - "name": "Stopping distance 60", + "description": "These data refer to a survey of the fauna on the sea bed lying between the coast of northern Queensland and the Great Barrier Reef. The sampling region covered a zone which was closed to commercial fishing, as well as neighbouring zones where fishing was permitted. In view of the large numbers and types of species captured in the survey the catch was summarized as a score, on a log weight scale, which combines information across species. Two such scores are available. The details of the survey, and a full analysis of the data, are in Poiner et al (1997). \n\n\n\n\nVariable\n\nDescription\n\n\n\n\nZone\n\nan indicator for the closed (1) and open (0) zones \n\nYear\n\nan indicator of 1992 (0) or 1993 (1) \n\nLatitude\n\nlatitude of the sampling position \n\nLongitude\n\nlongitude of the sampling position \n\nDepth\n\nbottom depth \n\nScore1\n\ncatch score 1 \n\nScore2\n\ncatch score 2 \n", + "download": "http://www.statsci.org/data/oz/reef.txt", + "filename": "reef", + "name": "Prawn Trawling in the Great Barrier Reef", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true - }, + } + ], + "subcategory_name": "Waters" + }, + { + "datasets": [ { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "Traffic fatalities in a variety of vehicles and for a variety of situations for the years from 1975 to 2013. These are multiple time series, but can also be related to each other. ", - "download": "https://dasl.datadescription.com/download/data/3495", - "filename": "Traffic-fatalities", - "name": "Traffic fatalities 2013", + "description": "Groups of dolphins were observed off the coast of Iceland near Keflavik in 1998. The data here give the time of the day and the main activity of the group, whether travelling quickly, feeding or socializing. The dolphin groups varied in size - usually feeding or socializing groups were larger than travelling groups. \n\n\n\n\nVariable\n\nDescription\n\n\n\n\nActivity\n\nMain activity of group: travelling (Travel), feeding (Feed) or socializing (Social)\n\nPeriod\n\nTime of the day: Morning, Noon, Afternoon or Evening\n\nGroups\n\nNumber of groups observed\n\n\n\n", + "download": "http://www.statsci.org/data/general/dolpacti.txt", + "filename": "dolpacti", + "name": "Activities of Dolphin Groups", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "The U.S. Energy Information Administration (EIA) collects data on the total energy used per capita in transportation for each state and the District of Columbia. The data show the per capita consumption in the year 2015 in millions of BTU per person. ", - "download": "https://dasl.datadescription.com/download/data/3496", - "filename": "Transportation-Energy", - "name": "Transportation Energy use", + "description": "Cairns (1988) analysed the relation between population and foraging area for seabird colonies. The following table presents their data for 22 black-legged kittiwake (a northern gull) colonies of Scotland's Shetland and Orkney Islands. Area is km2 and Population is the number of breeding pairs. ", + "download": "http://www.statsci.org/data/general/kittiwak.txt", + "filename": "kittiwak", + "name": "Kittiwake Colonies", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "U.S. Department of Transportation reports records of border crossings into each state on the U.S. border. Here are the border crossings by trucks for Alaska, recorded each month from 1999 through 2017. ", - "download": "https://dasl.datadescription.com/download/data/3499", - "filename": "Trucks_", - "name": "Trucks", + "description": "Some handicapped people have access to trained monkey helpers that can perform household tasks like switching things on and off. This data set gives the number of tasks each of nine monkeys can perform along with the number of years the monkeys have been working with handicapped people. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nName \n\nName of monkey\n\nYears \n\nNumber of years the monkey has worked with handicapped people\n\nTasks \n\nNumber of tasks the monkey can perform\n", + "download": "http://www.statsci.org/data/general/monkeys.txt", + "filename": "monkeys", + "name": "Trained Monkeys", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "http://jse.amstat.org/datasets/airport.txt", - "download": "http://jse.amstat.org/datasets/airport.dat.txt", - "filename": "US-Airport-Statistics", - "name": " US Airport Statistics", + "description": "Includes brain and body weight, life span, gestation time, time sleeping, and predation and danger indices for 62 species of mammals. Of interest is to predict the time spent sleeping and the proportion of sleep time in dream sleep. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nBodyWt\n\nbody weight (kg)\n\nBrainWt\n\nbrain weight (g)\n\nNonDreaming\n\nslow wave (\"nondreaming\") sleep (hrs/day)\n\nDreaming\n\nparadoxical (\"dreaming\") sleep (hrs/day)\n\nTotalSleep\n\ntotal sleep, sum of slow wave and paradoxical sleep (hrs/day)\n\nLifeSpan\n\nmaximum life span (years)\n\nGestation\n\ngestation time (days)\n\nPredation\n\npredation index (1-5)\n1 = minimum (least likely to be preyed upon); 5 = maximum (most likely to be preyed upon)\n\nExposure\n\nsleep exposure index (1-5)\n1 = least exposed (e.g. animal sleeps in a well-protected den); 5 = most exposed\n\nDanger\n\noverall danger index (1-5) (based on the above two indices and other information)\n1 = least danger (from other animals); 5 = most danger (from other animals)\n\n\n\n", + "download": "http://www.statsci.org/data/general/sleep.txt", + "filename": "sleep_", + "name": "Sleep in Mammals", "number_format": 31, "remove_quotes": true, - "separator": "SPACE", + "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, - "use_first_row_for_vectorname": false + "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "The data is a set of 50000 (1.3 MB ) observations containing roughly 2 minutes of traffic from the one hour, larger \ndec-pkt-1.tcp file used in the paper. The larger file can be accessed from the author's web page or from its source. With \nonly 50000 observations, the data set ", - "download": "http://jse.amstat.org/datasets/packetdata.dat.txt", - "filename": "packetdata", - "name": "packetdata", + "description": "Results of horse races at Eagle Farm, Brisbane, on 31 August 1998. The data, collected by Donald Forbes for his MS305 Data Analysis Project, give results for each horse in a sequence of 8 races. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nPosition\n\nFinishing position\n\nStarters\n\nNumber of horses in race\n\nLast\n\nFinishing position in last race\n\nSince\n\nDays since last race\n\nNumber\n\nIdentifying number of horse in race\n\nCarried\n\nWeight carried\n\nWeight\n\nHandicap weight\n\nBarrier\n\nBarrier position at start of race\n\nDistance\n\nLength of race\n\nLengths\n\nNumber of lengths that horse finished from winner\n\nOdds\n\nStarting odds\n\nStarts\n\nNumber of races previously started in\n\nAge\n\nAge of horse in years\n\nRatio\n\nProportion of wins in previous starts\n", + "download": "http://www.statsci.org/data/oz/horses.txt", + "filename": "horses", + "name": "Horse Racing at Eagle Farm", "number_format": 31, "remove_quotes": true, - "separator": "SPACE", + "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "This dataset consists of a listing of all US interstate\nhighways, treating the highway as the sampling unit.\n\nVARIABLE DESCRIPTIONS:\nInterstate # Columns 1-2\nNumber of states Columns 4-5\nApproximate miles Columns 7-10\nSouthern or Western end Columns 12-34\nNorthern or Eastern end Columns 36-57", - "download": "http://jse.amstat.org/datasets/ushighway1.dat.txt", - "filename": "ushighway1", - "name": " US INTERSTATE SYSTEM I", + "description": "The data give the survival times (in 10 hour units) in a 3 x 4 factorial experiment, the factors being (a) three poisons and (b) four treatments. Each combination of the two factors is used for four animals, the allocation to animals being completely randomized. \n", + "download": "http://www.statsci.org/data/general/poison.txt", + "filename": "poison", + "name": "Poison Experiment", "number_format": 31, "remove_quotes": true, - "separator": "SPACE", + "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, - "use_first_row_for_vectorname": false + "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "This dataset consists of a listing of all US interstate\nhighways, treating the highway/state combination \nas the sampling unit. Three principal cities through which the\nhighway runs are given for each state. Therefore, a single highway \nmay have several observations, one observation for each state.", - "download": "http://jse.amstat.org/datasets/ushighway2.dat.txt", - "filename": "ushighway2", - "name": "US INTERSTATE SYSTEM II", + "description": "The data was collected by Peter Drew and Matt Seidemann, statistics students at the Queensland University of Technology, in a subject taught by Dr Margaret Mackisack. Here is their description of the data and its collection: \n\"As keen fishermen out and about on a fairly regular basis, the common arguments arise between anglers on the best rigging set up for various conditions. We decided that upon our next group outing that we would back up our opinions with hard statistical facts. Our interest led us to test the most obvious variables in the fishing rig. \n\"Of interest were firstly the rod length, as between fisherman there always tends to be a variety of rods of different sizes; secondly the type of line, in that the larger the line it would be logical that the weight would increase; thirdly the sinker weight and how it affected the casting distance. \n\"In deciding on the three variables a 2^3 factorial design seemed obvious and for our purposes seemed to be quite adequate. So the question was placed as to whether or not the above variables in any combination made any difference to the overall distance the line was cast. The rods used were 6ft and 7ft two piece boat rods, fitted with the same type of spinning reel. The variable sinkers were 8oz and 12oz round ball sinkers and the line used was either the 1kg or 2kg line of the same make. \n\"The experiment was carried out on a day that was close to windless thus lowering the relative influence of the wind. The series of casts was conducted by the same person as were the measurements thus giving uniformity to the total experiment. A break of five minutes was timed between casts so as to allow the caster to allocate the same amount of energy to each cast. The rods were not rigged by the caster; a rigger would set the rod up with a combination of sinker, line and rod, and an effort was made to keep the caster oblivious to the changes in the rig. \n\"The experiment was conducted on the rugby ovals on Oleria St, Brookside (a western surburb of Brisbane) adjacent to the RSL (Returned Serviceman League club), which for all intents and purposes would be classified as a level surface. A line was placed at one end of the field and from it the caster would cast the rod as he would given normal fishing conditions. A spotter who was also the measurer would mark the point of impact of the sinker and from it measure back to the line from which it was cast. The distance observed was subsequently rounded up to the nearest 0.5 of a metre. Two runs were made of each combination. \n\"Possible improvements: Because of the time the rigging took, both casts with each rig were done at the same time. If we did it again it would be better to use random numbers to decide the order of all sixteen casts.\" ", + "download": "http://www.statsci.org/data/oz/fishing.txt", + "filename": "fishing_", + "name": "Fishing Rod Experiment", "number_format": 31, "remove_quotes": true, - "separator": "SPACE", + "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, - "use_first_row_for_vectorname": false + "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "This dataset consists of a listing of all US 3-digit interstate\nhighways (spurs and connecters), treating the highway/state\ncombination as the sampling unit.", - "download": "http://jse.amstat.org/datasets/ushighway3.dat.txt", - "filename": "ushighway3", - "name": "US INTERSTATE SYSTEM III", + "description": "Four male and four female turtles had their plasma protein measured while they were well fed and after ten and twenty days of fasting. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nSubject\n\n1-8\n\nSex\n\nMale or Female\n\nFed\n\nPlasma protein while well fed (mg/ml)\n\nFasted10\n\nPlasma protein after fasting 10 days\n\nFasted20\n\nPlasma protein after fasting 20 days\n", + "download": "http://www.statsci.org/data/general/turtles.txt", + "filename": "turtles", + "name": "Plasma Protein of Fasting Turtles", "number_format": 31, "remove_quotes": true, - "separator": "SPACE", + "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, - "use_first_row_for_vectorname": false - } - ], - "subcategory_name": "Travel" - }, - { - "datasets": [ + "use_first_row_for_vectorname": true + }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "The data give the number of deaths cuased by firearms in Australia from 1983 to 1997, expressed as a rate per 100,000 of population. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nYear \n\nYear\n\nRate \n\nNumber of deaths caused by firearms per 100,000 population\n", - "download": "http://www.statsci.org/data/oz/firearms.txt", - "filename": "firearms", - "name": "Deaths Caused by Firearms", + "description": "Frogs of four species had their oxygen consumption measured at two temperatures and two exercise levels. There were two frogs of each species at each temperature, and each of the two was measured both at rest and during forced exercise. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nSubject\n\n1-16\n\nSpecies\n\n1-4\n\nTemperature\n\nLow or High\n\nRest\n\nOxygen consumption (ml O2/g/hr) at rest\n\nExercise\n\nOxygen consumption during exercise\n\n\n\n\n", + "download": "http://www.statsci.org/data/general/frogs.txt", + "filename": "frogs_", + "name": "Oxygen Consumption of Frogs", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "Criminologists are interested in the effect of punishment regimes on crime rates. This has been studied using aggregate data on 47 states of the USA for 1960. The data set contains the following columns: \n \nVariable\n \nDescription\n\nM\n\npercentage of males aged 14–24 in total state population\n\nSo\n\nindicator variable for a southern state \n\nEd\n\nmean years of schooling of the population aged 25 years or over\n\nPo1\n\nper capita expenditure on police protection in 1960 \n\nPo2\n\nper capita expenditure on police protection in 1959 \n\nLF\n\nlabour force participation rate of civilian urban males in the age-group 14-24\n\nM.F\n\nnumber of males per 100 females \n\nPop\n\nstate population in 1960 in hundred thousands\n\nNW\n\npercentage of nonwhites in the population \n\nU1\n\nunemployment rate of urban males 14–24 \n\nU2\n\nunemployment rate of urban males 35–39 \n\nWealth\n\nwealth: median value of transferable assets or family income\n\nIneq\n\nincome inequality: percentage of families earning below half the median income\n\nProb\n\nprobability of imprisonment: ratio of number of commitments to number of offenses\n\nTime\n\naverage time in months served by offenders in state prisons before their first release\n\nCrime\n\ncrime rate: number of offenses per 100,000 population in 1960\n", - "download": "http://www.statsci.org/data/general/uscrime.txt", - "filename": "uscrime", - "name": "Effect of Punishment Regimes on Crime Rates", + "description": "The data give the age and the length of dugongs Dugong dugon (M�ller) captured near Townsville in north Queensland, Australia. The lifespan of a dugong is 50-60 years.\nThese data were working estimates. In particular the method of determining the age of dugong has changed somewhat since the data were recorded. \n\n\n\n\nVariable\n\nDescription\n\n\n\n\nAge\n\nAge in years\n\nLength\n\nLength in metres\n\n\n\n", + "download": "http://www.statsci.org/data/oz/dugongs.txt", + "filename": "dugongs", + "name": "Age and Length of Dugongs near Townsville", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "A number of homicide incidents in Australia have involved multiple killings. A multiple killing is defined as any incident where two or more persons are murdered. According to available literature, there have been 24 multiple killings by firearm between 1987 and 1996. These resulted in 128 deaths. The data give the number of multiple killings which have been recorded for the period 1987 to 28 April 1996. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nYear \n\n1987 - 1996.\n\nIncidents \n\nNumber of multiple killings\n\nDeaths \n\nTotal number of deaths\n\n\n\n\nThe data for the year 1996 include killings only up to and including 28 April.", - "download": "http://www.statsci.org/data/oz/multkill.txt", - "filename": "multkill", - "name": "Multiple Killings Committed with a Firearm", + "description": "The data give the sound pressure of sonar signals (\"clicks\") from a dolphin at various ranges to target. The measurements were made off the coast of Iceland near Keflavik in 1998. The pressure measurement given is \nraw pressure + a Range \nwhere a is a known constant depending on the water density. Pressure is expected to increase with distance even after the adjustment. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nRange \n\nDistance to dolphin in metres\n\nSoundPressure \n\nWater sound pressure adjusted for water density\n", + "download": "http://www.statsci.org/data/general/dolphin.txt", + "filename": "dolphin", + "name": "Sound Pressure of Dolphin Sonar", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "A survey was conducted in the United States and 10 countries of Western Europe to determine the percentage of teenagers who had used marijuana and other drugs. The data give percentages of drug use by country. ", - "download": "https://dasl.datadescription.com/download/data/3178", - "filename": "Drug-abuse", - "name": "Drug abuse", + "description": "The observed responses are Geiger counter counts (times 10-4) used to measure the amount of radioactively tagged sulfate drug in the blood of a baboon named Brunhilda after an injection of the drug. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nHours\n\nTime in hours since injection\n\nSulfate\n\nGeiger counter counts � 10-4\n", + "download": "http://www.statsci.org/data/general/brunhild.txt", + "filename": "brunhild", + "name": "Blood Sulfate in a Baboon Named Brunhilda", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "The 2013 World Drug Report investigated the prevalence of drug use as a percentage of the population aged 15 to 64. Data from 32 European countries are shown.", - "download": "https://dasl.datadescription.com/download/data/3179", - "filename": "Drug-use-2013", - "name": "Drug use 2013", + "description": "The European rabbit Oryctolagus cuniculus is a major pest in Australia. A reliable method of age determination for rabbits caught in the wild would be of importance in ecological studies. In this study, the dry weight of the eye lens was measured for 71 free-living wild rabbits of known age. Eye lens weight tends to vary much less with environmental conditions than does total body weight, and therefore may be a much better indicator of age \nThe rabbits were born and lived free in an experimental 1.7 acre enclosure at Gungahlin, ACT. The birth data and history of each individual were accurately known. Rabbits in the enclosure depended on the natural food supply. In this experiment, 18 of the eye lenses were collected from rabbits that died in the course of the study from various causes such as coccidiosis, bird predation or starvation. The remaining 53 rabbits were deliberately killed, immediately after being caught in the enclosure or after they had been kept for some time in cages. The lenses were preserved and their dry weight determined. \n\n\n\n\nVariable\n\nDescription\n\n\n\n\nAge\n\nAge of rabbit in days\n\nLens\n\nDry weight of eye lens in milligrams\n", + "download": "http://www.statsci.org/data/oz/rabbit.txt", + "filename": "rabbit", + "name": "Age and Eye Lens Weight for Rabbits in Australia", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "Prisons 2014", - "download": "https://dasl.datadescription.com/download/data/3406", - "filename": "Prisons-2014", - "name": "Prisons 2014", + "description": "Insects were exposed to gaseous carbon disulphide for a period of 5 hours. Eight experiments were run with different concentrations of carbon disulphide. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nDose\n\nDose of carbon disulphide\n\nExposed\n\nNumber of beetles exposed\n\nMortality\n\nNumber of beetles killed\n", + "download": "http://www.statsci.org/data/general/beetles.txt", + "filename": "beetles", + "name": "Beetle Mortality", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true - } - ], - "subcategory_name": "Crime" - }, - { - "datasets": [ + }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "An individual's critical flicker frequency is the highest frequency at which the flicker in a flickering light source can be detected. At frequencies above the critical frequency, the light source appears to be continuous even though it is actually flickering. This investigation recorded critical flicker frequency and iris colour of the eye for 19 subjects. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nColour\n\nEye colour: Brown, Green or Blue\n\nFlicker\n\nCritical flicker frequency in cycles/sec\n", - "download": "http://www.statsci.org/data/general/flicker.txt", - "filename": "flicker", - "name": "Eye Colour and Flicker Frequency", + "description": "Bill Venables writes: \nGroups of 20 snails were held for periods of 1, 2, 3 or 4 weeks in carefully controlled conditions of temperature and relative humidity. There were two species of snail, A and B, and the experiment was designed as a 4 by 3 by 4 by 2 completely randomized design. At the end of the exposure time the snails were tested to see if they had survived; the process itself is fatal for the animals. The object of the exercise was to model the probability of survival in terms of the stimulus variables, and in particular to test for differences between species. The data are unusual in that in most cases fatalities during the experiment were fairly small. \nSpecies\n \nSnail species A or B \nExposure\n \nExposure in weeks (4 levels) \nHumidity\n \nRelative humidity (4 levels) \nTemp\n \nTemperature in degrees Celsius (3 levels) \nDeaths\n \nNumber of deaths \nN \n \nNumber of snails exposed \n", + "download": "http://www.statsci.org/data/oz/snails.txt", + "filename": "snails_", + "name": "Snail Mortality", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "The data are a random sample from the data in Population commute times.", - "download": "https://dasl.datadescription.com/download/data/3123", - "filename": "Commute-times-sample100", - "name": "Commute times sample100", + "description": "Activity of individually caged fiddler crabs under constant conditions for 225 consecutive hours (225 = 9*25 = 9*24 + 8). The activity scale is log(y+1) where y is mean minutes per hour. Examination of the data suggests that the logarithm was base 10. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nActivity\n\nlog(Minutes per hour+1)\n", + "download": "http://www.statsci.org/data/general/fiddler.txt", + "filename": "fiddler", + "name": "Activity of Fiddler Crabs", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "-", - "download": "https://dasl.datadescription.com/download/data/3137", - "filename": "Couples", - "name": "Couples", + "description": "Tidal shrimps from the Brisbane River move up and down the tidal area (harbour pylon for example) in accordance with the movement of the tides. In this experiment shrimps were removed from their natural environment and isolated from environmental stimulae which would allow them to measure time. Their vertical position on an inclined slope was recorded every half hour starting 20 hours after removal and continuing for one week. Also recorded is the actual tide height during the same period, and six other measures of the shrimps' activity. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nTime \n\nHours since isolation\n\nVertical \n\nVertical displacement from original position\n\nY2 - Y7 \n\nOther activity measurements\n\nTide \n\nActual tide height\n", + "download": "http://www.statsci.org/data/oz/shrimp.txt", + "filename": "shrimp_", + "name": "Movement of Tidal Shrimps in Isolation", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "Data give the mortality rate (deaths per 100,000 people) and the education level (average number of years in school) for 58 U.S. cities. ", - "download": "https://dasl.datadescription.com/download/data/3183", - "filename": "Education-and-mortality", - "name": "Education and mortality", + "description": "Monthly total number of pigs slaughtered in Victoria, from January 1980 to August 1995.", + "download": "http://www.statsci.org/data/oz/pigs.txt", + "filename": "pigs", + "name": "Pigs Slaughtered in Victoria", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "Students in a large statistics class were asked to report the eye color and hair color. Is there an association? ", - "download": "https://dasl.datadescription.com/download/data/3197", - "filename": "Eye-and-Hair-color", - "name": "Eye and Hair color", + "description": "Seed predators and herbivores can operate as strong selective agents in the evolution of plant defence. In this context, Delpino (1886) posed the \"ant-guard\" hypothesis to explain the role of extrafloral nectaries on plants. Extrafloral nectaries (EFN), distributed on species in over 80 plant families, occur on vegetative organs and \"outer floral parts\" not directly associated with pollination. Basically, the hypothesis states that extrafloral nectar production attracts pugnacious \"bodyguards\" (usually ants) which by their foraging activities deter the activities of herbivorous insects and seed predators. \nSince its inception, the ant-guard hypothesis has remained controversial. A few careful studies have experimentally demonstrated that ants attending EFN protect plants (von Wettstein, 1889; Inouye and Taylor, 1979; Schemske, 1980) while several recent studies showed no effect (O’Dowd and Catchpole, 1983; Tempel, 1983; Boecklen, 1984). O’Dowd and Catchpole (1983), for example, found that attendance of ants at EFN deterred other insects from developing flowerheads but that their presence decreased neither the numbers of seed predators nor damage to developing flowerheads. The object of this paper is to describe the ant-insect interactions by means of a simple probability model. \nFull experimental detail is provided by O'’Dowd and Catchpole (1983) but an outline is as follows. The plants studied were helichrysum bracteatum. Three sites were chosen in clearings in the Tallaganda State forest, 40 km. southeast of Canberra, and at each site ten pairs of plants were studied. Plants within each pair were of similar initial size and less than 1 metre apart. Within each pair, ants were excluded from one plant, while the other served as a control. The plants were censused once a week for 17 weeks over the reproductive season (from initiation of flowerheads through the postflowering phase). The data recorded for each plant included the number of flowerheads (capitula), the number of capitula with ants, and the total number of other insects. Different species of ants (predominantly Iridomyrmex spp.) and other insects were observed, but in the data here are pooled within each general category. \nTo clarify: the first column (Week) lists the week the observation was made, the second (Index) lists the index given to the pair of plants observed, the third (AntCap) is the number of capitula on the plant with ant access, the fourth (ExcCap) is the number of capitula on the plant excluded from ant access, the fifth (Ants) is the number of capitula that have ants present on them, the sixth column (AntIns) is the number of insects on the plant with ant access, and the seventh (ExcIns) is the number of insects on the plant excluded from ant access. Index number 1-10 refer to Site 1, 11-20 to Site 2 and 21-30 to Site 3.", + "download": "http://www.statsci.org/data/oz/ants.txt", + "filename": "ants", + "name": "Ant-Insect Interactions on Flowerheads", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "Eurostat, an agency of the European Union (EU), conducts surveys on several aspects of daily life in EU countries. Recently, the agency asked samples of 1000 respondents in each of 14 European countries whether they read the newspaper on a daily basis. ", - "download": "https://dasl.datadescription.com/download/data/3363", - "filename": "Newspapers", - "name": "Newspapers", + "description": "A new type of heart valve has been developed and is implanted in 63 dogs that have been raised on various levels of exercise. The numbers of valve transplants that succeed are recorded. Is the proportion of successful implants the same for dogs on all exercise regimens? Is there a trend with amount of exercise in the proportion of successful implants? \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nExercise\n\nAmount of exercise: 1=None, 2=Slight, 3=Moderate, 4=Vigorous\n\nImplant\n\n1=Successful, 2=Unsuccessful\n\nFrequency\n\nNumber of dogs\n\n\n\n", + "download": "http://www.statsci.org/data/general/exervalv.txt", + "filename": "exervalv", + "name": "Heart Valves in Dogs on Different Exercise Regimens", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "Population Commute Times", - "download": "https://dasl.datadescription.com/download/data/3401", - "filename": "Population-Commute", - "name": "Population Commute Times", + "description": "The data give growth measurements on Tammar wallabies (Macropus eugenii). Each line is a set of measurements on an animal at a particular time. Most lengths are in tenths of millimetres. The data from some animals is very fragmentary. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nAnim\n\nAnimal number\n\nSex\n\n1=male, 2=female\n\nLoca\n\nLocation of animal\n\nLeng\n\nLength of animal (tenths of a millimetre)\n\nHead\n\nHead length\n\nEar\n\nEar Length\n\nArm\n\nArm length\n\nLeg\n\nLeg length\n\nPres\n\nPes (foot) length\n\nTail\n\nTail length\n\nWeight\n\nWeight (tenths of a gram)\n\nAge\n\nAge in days from birth\n", + "download": "http://www.statsci.org/data/oz/wallaby.txt", + "filename": "wallaby", + "name": "Dryandra Tammar WallabyGrowth of Tammar Wallabies", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "the percentage change in population for the 50 states and the District of Columbia from the 2000 census to the 2010 census. ", - "download": "https://dasl.datadescription.com/download/data/3402", - "filename": "Population-growth-2010", - "name": "Population growth 2010", + "description": "Following the Second World War, D. L. Serventy carried out a detailed study of the lifecycle of the Tasmanian muttonbird (Puffinus tenuirostris, often called the short-tailed shearwater). The data here concerns the growth pattern of fledgling birds of this species. \nAfter the eggs hatch, the parent birds spend much time away from the next, and with increasing time their returns become rarer and rarer. When they return the young bird feed copiously, and there is very rapid weight-gain; whilst they are absent, the offspring loses weight. The result is not a smooth growth curve such as one finds in most measurements in developing animals and birds, but a 'sawtooth' effect. The data were collected in 1954 as weighings each morning of two fledgling chicks on Fisher Island, Bass Strait, and each set terminates on the day the chick left the nest. \nMuch of the interest in these curves comes not from the description they give of the weight of the chick, but from the information they contain on the feeding patterns of the parents. There are three obvious features of the data; the timing of the feeds and the size of the feeds when they occur, both of which represent aspects of the feeding pattern of the parents; and the loss in weight of the chicks between feeds. Henstridge and Tweedie (1984) proposed a model, similar to those used in storage theory, which describes each of these phenomena separately.", + "download": "http://www.statsci.org/data/oz/muttonbi.txt", + "filename": "muttonbi", + "name": "Growth of Tasmanian Muttonbirds", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "Crowd Management Strategies monitors accidents at rock concerts. In their database, they list the names and other variables of victims whose deaths were attributed to “crowd crush” at rock concerts. The data give the victims’ ages for data from a one-year period: ", - "download": "https://dasl.datadescription.com/download/data/3429", - "filename": "Rock-concert-deaths", - "name": "Rock concert deaths", + "description": "Dimensions in millimetres are given of two samples of jellyfish from Hawkesbury River in New South Wales, Australia. One of the samples came from Dangar Island and the other from Salamander Bay. The first column contains a \"D\" if the measurement came from Dangar Island and a \"S\" if it came from Salamander Bay. The dimensions measured were length and width. What can one learn from graphing the two principal components? Try graphing principal components of the logarithms of the measurements. Can the dimensions determine the location?", + "download": "http://www.statsci.org/data/oz/jellfish.txt", + "filename": "jellfish", + "name": "Dimensions of Jellyfish", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "A study at a liberal arts college attempted to find out whether men and women watch the same amount of TV, on average and whether it mattered if students were varsity athletes or not. Student researchers asked 200 randomly selected students questions about their backgrounds and about their television-viewing habits and received 197 legitimate responses. The researchers found that men watch, on average, about 2.5 hours per week more TV than women, and that varsity athletes watch about 3.5 hours per week more than those who are not varsity athletes. But is this the whole story? To investigate further, they divided the students into four groups: male athletes (MA), male non-athletes (MNA), female\nathletes (FA), and female non-athletes (FNA).", - "download": "https://dasl.datadescription.com/download/data/3504", - "filename": "TV-watching", - "name": "TV watching", + "description": "A study was conducted concerning the counts of lesions produced on membranes of chick embryos by viruses of the pox group. The data give the numbers of lesions formed at a series of dilutions of the viral medium. \n\n\n\n\nVariable\n\nDescriptions\n\n\n\n\nDilution\n\nDilution of viral medium, from 1 to 32\n\nCount\n\nNumber of lesions\n", + "download": "http://www.statsci.org/data/general/pocklesi.txt", + "filename": "pocklesi", + "name": "Pock Lesions on Chick Embryos", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "Insurance companies and other organizations use actuarial tables to estimate the remaining lifespans of their customers. The data file gives estimated life expectancy and additional years of life for black males in the United States, according to a 2016 National Vital Statistics Report, A regression model to predict Life expectancy from Age appears to fit well, but consider the residuals.", - "download": "https://dasl.datadescription.com/download/data/3542", - "filename": "Years-to-live", - "name": "Years to live 2016", + "description": "This data comes from an experiment on induction of flowering of cyclamen. Plants of 4 varieties of cyclamen were subject to a combination of 6 temperature regimens and 4 levels of fertilization. The temperature regimens are combinations of five temperatures during the day (14, 16, 18, 20 and 26 degrees C) and four temperatures during the night (14, 16, 18 and 20 C). Not all the combinations of temperatures are present. The response is the number of flowers, which vary from 4 to 26, with mode 8. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nVariety\n\nVariety of cyclamen\n\nRegimem\n\nTemperature regimen (combination of the temperature during the day and the temperature during the night)\n\nDay\n\nTemperature during the day (Centigrade)\n\nNight\n\nTemperature during the night\n\nFertilizer\n\nLevel of fertilization\n\nFlowers\n\nNumber of flowers\n", + "download": "http://www.statsci.org/data/general/cyclamen.txt", + "filename": "cyclamen", + "name": "Number of Cyclamen Flowers", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "Fortune magazine collected the zodiac signs of 256 heads of the largest 400 companies. The data shows the number of births for each sign. ", - "download": "https://dasl.datadescription.com/download/data/3547", - "filename": "Zodiac", - "name": "Zodiac", + "description": "In an experiment where pregnant mice were exposed to the herbicide 2,4,5-T (the active component in Agent Orange), the number of fetal implants in utero were recorded. The data give the frequency distribution of implants at each of seven dose levels measured in mg/kg of body weight. \nOn days 6-14 after mating, pregnant dams were dosed by gavage with one of the doses of 2,4,5-T. Prior to giving birth, the dams were sacrificed and the number of viable, dead and reabsorbed foetuses in the uterus of the dam were determined. The data here gives the number of surviving viable implants. An outcome of zero implants cannot be distinguished from a non-pregnant outcome so any zero implant outcomes were excluded. \n\n\n\n\nVariable\n\nDescriptions\n\n\n\n\nDose\n\nDose of 2,4,5-T in mg/kg/day\n\nImplants\n\nNumber of surviving implants\n\nFrequency\n\nNumber of mice with that number of implants\n", + "download": "http://www.statsci.org/data/general/fetaimpl.txt", + "filename": "fetaimpl", + "name": "Fetal Implants in Mice Utero", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true - }, + } + ], + "subcategory_name": "Animals" + }, + { + "datasets": [ { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "For each of the forty largest countries in the world (according to 1990\npopulation figures), data are given for the country's life expectancy\nat birth, number of people per television set, and number of people per\nphysician.", - "download": "http://jse.amstat.org/datasets/televisions.dat.txt", - "filename": "televisions", - "name": "Televisions, Physicians, and Life Expectancy", + "description": "Risk and Sammarco (1991) found that the density of the Great Barrier Reef coral Porites lobata increases with distance from the Australian shore, due to differences between inshore and offshore environments. They made three measurements at each of nine reefs at various distances from the shore. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nReef\n\nName of reef\n\nDistance\n\nDistance to shore (km)\n\nDensity\n\nCoral head density (g/cm3)\n", + "download": "http://www.statsci.org/data/oz/coralden.txt", + "filename": "coralden", + "name": "Density of Great Barrier Reef Coral Heads", "number_format": 31, "remove_quotes": true, - "separator": "SPACE", + "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "The file ch12.dat contains the following variables:\n\nlstay: Length of stay of a resident\nage: Age of a resident\ntrt: Nursing home assignment (1: receive treament,0: control)\ngender: Gender (1:male,0:female)\nmarstat: Marital status (1: married,0: not married)\nhlstat: Health status (2: second best, 5: worst)\ncens: Censoring indicator (1:censored, 0: discharged)", - "download": "http://lib.stat.cmu.edu/datasets/csb/ch12.dat", - "filename": "Nursing-Home-Usage", - "name": "\nParametric Duration Analysis of Nursing Home Usage", + "description": "The data give the volume (cubic feet), height (feet) and diameter (inches) (at 54 inches above ground) for a sample of 31 black cherry trees in the Allegheny National Forest, Pennsylvania. The data were collected in order to find an estimate for the volume of a tree (and therefore the timber yield), given its height and diameter. ", + "download": "http://www.statsci.org/data/general/cherry.txt", + "filename": "cherry", + "name": "Volume of Black Cherry Trees", "number_format": 31, "remove_quotes": true, - "separator": "SPACE", + "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, - "use_first_row_for_vectorname": false + "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "This data set was derived from sample survey data collected in 1988\nin two surveys designed to evaluate the City of Toronto Workplace\nSmoking By-law (National Health Research and Development Program,\nCanada, Project Grant 6606-3346-46). The principal investigator\nwas Dr. L.L. Pederson, University of Western Ontario, Ontario,\nCanada. The surveys were conducted by the Institute for Social\nResearch at York University, Ontario, Canada in January-February\n1988 and in November-December 1988. By agreement with the\nInstitute for Social Research, York University, the survey data are\nin the public domain. This data set can be used freely for\nnoncommercial purposes and can be freely distributed.\n\nThere are 15 variables in the data set, with values separated by\nblanks. There are no missing values. The CSB variable names are as\nfollows: \n\nidno y w x1 x2 x3 z1 z2 z3 z4 z5 z6 z7 z8 z9\n\n\nSHORT DESCRIPTION NAME DEFINITION AND CODING\n\nUnique identifier idno (5 digits, beginning with 1 or 2)\n\nOutcome y Attitude toward smoking in the\n workplace. Smoking should be: \n (1 = prohibited, 2 = restricted,\n 0 = unrestricted)\n\nWeight w Sampling/post-stratification weight\n (ranges from 0.305 to 4.494)\n\nTime x1 Time of survey relative to\n implementation of the by-law \n on March 1, 1988\n (1 = post, 0 = pre)\n\nWork x2 Place of work indicator 1\n with City of Toronto as baseline\n (1 = outside City of Toronto,\n 0 = otherwise)\n\n x3 Place of work indicator 2\n with City of Toronto as baseline\n (1 = not outside the home, \n 0 = otherwise)\n\nResidence z1 Place of residence\n (1 = City of Toronto, \n 0 = other Metro Toronto)\n\nSmoking z2 Smoking status indicator 1\n with those who have never smoked \n as the baseline\n (1 = current smoker, \n 0 = otherwise)\n\n z3 Smoking status indicator 2\n with never as the baseline\n (1 = quit <=6 months ago, \n 0 = otherwise)\n\n z4 Smoking status indicator 3\n with never as the baseline\n (1 = quit >6 months ago, \n 0 = otherwise)\n\n z5 Smoking status indicator 4\n with quit >12 months as the baseline\n (1 = quit 6-12 months, \n 0 = otherwise)\n\nKnowledge z6 Knowledge of health effects of\n environmental tobacco smoke\n (score, ranges from 0 to 12)\n\nSex z7 Sex of respondent\n (1 = male, 0 = female)\nAge z8 Age of respondent\n ( (age in years - 50)/10 )\n\nEducation z9 Level of education\n (-2 = elementary, \n -1 = some high school, \n 0 = high or trade school, \n 1 = college or some university,\n 2 = university degree)\n ", - "download": "http://lib.stat.cmu.edu/datasets/csb/ch13.dat", - "filename": "Smoking-Restrictions", - "name": "Analysis of Attitudes Towards Workplace Smoking Restrictions", + "description": "The data is from a dew-retting experiment in Ballarat 1942-43, in which flax was laid out under various climactic conditions and for various periods. Retting involves softening the flax stems by soaking in water, thus enabling the separation of the linen fibres from the wooden material by a process called scrutching. The flax variety used was \"Liral Crown\". Two samples were taken from each trial and the ret loss, as a percentage, was calculated. The other three variables are the mean daily rainfall (in points), the retting period (in days) and the mean daily temperature (in degrees Farenheit).", + "download": "http://www.statsci.org/data/oz/retloss.txt", + "filename": "retloss", + "name": "Ret Loss in Flax", "number_format": 31, "remove_quotes": true, - "separator": "SPACE", + "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, - "use_first_row_for_vectorname": false - } - ], - "subcategory_name": "Population" - }, - { - "datasets": [ + "use_first_row_for_vectorname": true + }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "The data were collected as part of a time study for Telecom, now known as Telstra. The purpose if the study was to model the total hours worked in a section of Telecom in terms of the counts of various tasks. It was hoped that such a model could be used to predict hours worked and hence staffing requirements in changing circumstances. The number of hours worked by employees in a fault reporting centre were recorded, together with the number of faults of each type which were recorded. \nEmployees often work on a flexitime system which allows them to build up time and to leave early every second Friday. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nHours\n\nNumber of hours worked\n\nByDa\n\nNumber of talks of a certain type\n\nPR\n\n\n\nRWT\n\nA type of fault variable \n\nFault\n\n\n\nSOA\n\nNumber of service orders of type A \n\nSOB\n\nNumber of service orders of type B \n\nSOC\n\nNumber of service orders of type C \n\nCable\n\n\n\nField\n\nField call \n\nHot\n\nHotline \n\nREST\n\n\n\nSpec\n\n\n\nApp\n\n\n\nProb\n\n\n\nSC\n\n\n\nHO\n\n\n\nMO\n\n\n\nDay\n\nDay of the week: 1-Monday, 2-Tuesday, 3-Wednesday, 4-Thursday, 5-Friday \n", - "download": "http://www.statsci.org/data/oz/telecom.txt", - "filename": "telecom", - "name": "Telecom Work Measurement Study", + "description": "A production plant cost-control engineer is responsible for cost reduction. One of the costly items in his plant is the amount of water used by the production facilities each month. He decided to investigate water usage by collecting seventeen observations on his plant's water usage and other variables. \n\n\n\n\nVariable\n\nDescription\n\n\n\n\nTemperature\n\nAverage monthly temperate (F)\n\nProduction\n\nAmount of production (M pounds)\n\nDays\n\nNumber of plant operating days in the month\n\nPersons\n\nNumber of persons on the monthly plant payroll\n\nWater\n\nMonthly water usage (gallons)\n", + "download": "http://www.statsci.org/data/general/water.txt", + "filename": "water_", + "name": "Water Usage of Production Plant", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "An industrial Taguchi experiment was performed to study the influence of several controllable factors on the mean value and the variation in the percentage of shrinkage of products made by injection moulding. For studying the variation, three noise factors were also included in the design. All factors were set at two levels. \nThe problem is a `nominal-is-best' problem where the aim is to reach a certain tartet for the percentage shrinkage, at the same time having as small as variation as possible about the target value. The design that was applied is a so-called Taguchi L8(27)-design with seven controllable factors. At each setting of the controllable factors, the noise factors were varied according to a Taguchi L4(23)-design. \n\n\n\n\nVariable\n\nDescription\n\n\n\n\nControllable Factors:\n\nCycle\n\nCycle time\n\nMould\n\nMould temperature\n\nCavity\n\nCavity thickness\n\nPressure\n\nHolding pressure\n\nSpeed\n\nInjection speed\n\nTime\n\nHolding time\n\nGate\n\nGate size\n\nNoise Factors:\n\nRegrind\n\nPercentage regrind\n\nMoisture\n\nMoisture content\n\nTemperature\n\nAmbient temperature\n\nResponse:\n\nShrinkage\n\nPercentage shrinkage\n", - "download": "http://www.statsci.org/data/general/injmould.txt", - "filename": "injmould", - "name": "Injection Moulding Shrinkage", + "description": "Ryan et al (1994) describe the data as follows: \nIn autumn, small winged fruit called samara fall off maple trees, spinning as they go. A forest scientist studied the relationship between how fast they fell and their \"disk loading\" (a quantity based on their size and weight). The samara disk loading is related to the aerodynamics of helicopters. \nThe data give the loadings and fall velocities for fruit from three trees. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nTree\n\n1 to 3\n\nLoading\n\nDisk loading\n\nVelocity\n\nFall velocity\n", + "download": "http://www.statsci.org/data/general/samara.txt", + "filename": "samara", + "name": "Fall Velocities for Samara Fruit", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "The data give the ambient temperature and the number of primary O-rings damaged for 23 of the 24 space shuttle launches before the launch of the space shuttle Challenger on January 20, 1986. (Challenger was the 25th shuttle. One engine was lost at sea and could not be examined.) Each space shuttle contains 6 primary O-rings. \nThe forecast temperate of the launching day of the Challenger was 31 degrees F. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nTemp\n\nAmbient temperature\n\nDamaged\n\nNumber of O-rings damaged\n", - "download": "http://www.statsci.org/data/general/challenger.txt", - "filename": "challenger", - "name": "Space Shuttle Challenger", + "description": "The yield of pasture regrowth was measured together with the number of days since last grazing. The measurements were done on different experimental units so it is reasonable to assume the errors independent. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nDays\n\nDays since last grazing\n\nYield\n\nYield of pasture\n", + "download": "http://www.statsci.org/data/general/regrowth.txt", + "filename": "regrowth", + "name": "Pasture Regrowth after Grazing", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "The data consist of failures of a piece of electronic equipment operating in two modes. For each operating period, Mode1 is the time spent operating in one mode and Mode2 is the time spent operating in the other. The total number of failures recorded in each period is recorded. \n \n\n\n \nVariable \n \nDescription\n \n\n\n \nMode1 \n \nTime in operating mode 1\n \nMode2 \n \nTime in operating mode 2\n \nFailures\n \nNumber of failures\n \n\n\n", - "download": "http://www.statsci.org/data/general/twomodes.txt", - "filename": "twomodes", - "name": "Failures of Electronic Equipment", + "description": "Herbicide bioassay is concerned with the reduction in plant growth as a function of the herbicide dose applied. This is a interest when developing new herbicides, assessing environmental effects on non-target species or estimating the residual herbicides in a treated soil before planting a new, herbicide suspectible crop. A typical experiment would comprise a series of doses ranging from ineffective to severely damaging to establish a dose-response relationship. In this experiment the callus area of a tissue culture of Brassica napus was measured corresponding to different doses of a sulfonylurea herbicide, metsulfuron methyl. \n\n\n\n\nVariable\n\nDescription\n\n\n\n\nChlorsulfuron\n\nConcentration of herbicide in nmol/L\n\nCallus\n\nLogarithm of callus area\n", + "download": "http://www.statsci.org/data/general/brassica.txt", + "filename": "brassica", + "name": "Response of Brassica napus to Chlorsulfuron", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "Sanford Weisberg writes \nWhen gasoline is pumped into a tank, hydrocarbon vapors are forced out of a tank and into the atmosphere. To reduce this significant source of air pollution, devices are installed to capture the vapor. In testing these vapor recovery systems, the amount that escapes cannot be measured, but a \"sniffer\" can determine if some vapor is escaping. Also, the amount that is recovered can be measured. To estimate the efficiency of the system, some method of estimating the total amount given off must be used. To this end, a laboratory experiment was conducted in which the amount of vapor given off was measured under carefully controlled conditions. Four variables are relevant for modeling. In an experiment, these conditions were varied and the quantity of emitted hydrocarbons was measured in grams. \n\n\n \n \nVariable\n \nDescription\n\n\n\n\n \n \nTankTemp\n - \ninitial tank temperature (�F)\n\n\nGasTemp\n - \ntemperature of the dispensed gasoline (�F)\n\n\nTankPres\n - \ninitial vapor pressure in the tank (psi)\n\n\nGasPres\n - \nvapor pressure of the dispensed gasoline (psi)\n\n\nHC\n - \nemitted hydrocarbons (g)\n\n\n", - "download": "http://www.statsci.org/data/general/gasvapor.txt", - "filename": "gasvapor", - "name": "Sniffing for Hydrocarbon Vapour", + "description": "The data concerns the underground root system of eight separate apple trees. Three different root stocks are considered (Mark, MM106 and M26) and two plant spacing (4x2 meters and 5x3 meters). For each plant, soil core sampling units taken have been classified as belonging to an inner or outer zone. The response variable is the density of fine roots, also called the root length density, which can have zeros as well as continuous positive values. There are 511 observations, of which 193 or 38% have a zero response. \nThe design is not a full factorial design: plants 1 and 2 are tested only with the Mark root stock and at a spacing of 5x3; plants 3 and 4 are tested only with Mark root stock at a spacing of 4x2; plants 5 and 6 are tested only with root stock MM106 at a spacing of 5x3; and plants 7 and 8 are tested only with M26 root stock at a spacing of 4x2. The Mark root stock is tested at both plant spacings but the MM106 only at 5x3 and M26 only at 4x2. So there are four unique treatment combinations: Mark stock at 5x3 and 4x2, MM106 at 5x3, and M26 at 4x2. \nIt is of interest to (1) compare effects of spacing within Mark rootstock, (2) compare root stocks within same spacing and (3) to look for any difference in RLD between inner and out zones. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nPlant \n\n1 to 8\n\nStock\n\nRoot stock: Mark, MM106 or M26\n\nSpacing\n\nPlant spacing: 5x3 or 4x2 meters\n\nZone\n\nZone relative to the plant the soil core is taken from: Inner or Outer\n\nRLD\n\nRoot length density in cm/cm3\n", + "download": "http://www.statsci.org/data/oz/fineroot.txt", + "filename": "fineroot", + "name": "Root Length Density of Apple Trees", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true - }, + } + ], + "subcategory_name": "Plants" + }, + { + "datasets": [ { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "https://en.wikipedia.org/wiki/List_of_U.S._states_by_electricity_production_from_renewable_sources", - "download": "https://dasl.datadescription.com/download/data/3051", - "filename": "Alternative-energy", - "name": "Alternative energy 2016", + "description": "Data were collected from a mine in Cobar, NSW, Australia. At each of 38 sampling points, several measurements were taken, one of which is the 'true-width' of an ore-bearing rock layer. Also given are the co-ordinates t1 and t2 of of the data sites. Green and Silverman (1994) use this data set to illustrate thin-plate splines for fitting a smooth surface.", + "download": "http://www.statsci.org/data/oz/ore.txt", + "filename": "ore", + "name": "Wide of Ore-Bearing Layer", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "In a statement to a Senate Public Works Committee, a senior executive of Texaco, Inc., cited a study on the effectiveness of auto filters on reducing noise. Because of concerns about performance, two types of filters were studied, a standard silencer and a new device developed by the Associated Octel Company. Noise is in decibels/10. […] ", - "download": "https://dasl.datadescription.com/download/data/3058", - "filename": "Auto-noise-filters", - "name": "Auto noise filters", + "description": "The proportions of sand, silt and clay in soil samples are given for 8 contiguous sites. The sites extended over the crest and flank of a low rise in a valley underlain by marl near Albudeite in the province of Murcia, Spain. The sites were small areas of ground surface of uniform shape internally and delimited by relative discontinuities externally. Soil samples were obtained for each site at 11 random points within a 10m by 10m area centred on the mid-point of the site. All samples were taken from the same depth. The data give the sand, silt and clay content of each sample, expressed as a percentage of the total sand, silt and clay content. \nThe purpose of the study by Wright and Wilson (1979) was to determine whether the sites could be differentiated on the basis of their soil composition. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nSite \n\n1-8\n\nSand \n\nPercent sand\n\nSilt\n\nPercent silt\n\nClay\n\nPercent clay\n", + "download": "http://www.statsci.org/data/general/murcia.txt", + "filename": "murcia", + "name": "Composition of Soil from Murcia Province, Spain", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true - }, + } + ], + "subcategory_name": "Geology" + } + ] + }, + { + "category_name": "Statistics", + "subcategories": [ + { + "datasets": [ { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "A student experiment was run to test the performance of 4 brands of batteries under 2 different Environments (room temperature and cold). For each of the 8 treatments, 2 batteries of a particular brand were put into a flashlight. The flashlight was then turned on and allowed to run until the light went out. The […] ", - "download": "https://dasl.datadescription.com/download/data/3070", - "filename": "Batteries", - "name": "Batteries", + "description": "The data give the year of founding for 40 New Zealand wineries.", + "download": "http://www.statsci.org/data/oz/wineries.txt", + "filename": "wineries", + "name": "Founding Dates of NZ Wineries", "number_format": 31, "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, + "separator": "auto", + "simplify_whitespaces": true, + "skip_empty_parts": false, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "Stopping distances in feet for a car tested 3 times at each of 5 speeds. We hope to create a model that predicts Stopping Distance from the Speed of the car. ", - "download": "https://dasl.datadescription.com/download/data/3086", - "filename": "Brakes", - "name": "Brakes", + "description": "The data give the selling price at auction of 32 antique grandfather clocks. Also recorded is the age of the clock and the number of people who made a bid. ", + "download": "http://www.statsci.org/data/general/auction.txt", + "filename": "auction", + "name": "Selling Price of Antique Grandfather Clocks", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "Measurements on 38 1978-79 model automobiles. Gas mileage in miles per gallon as measured by Consumers’ Union on a test track. Other values as reported by automobile manufacturer. Used to illustrate regression model building and diagnosis. Be sure to check the residuals when predicting MPG. ", - "download": "https://dasl.datadescription.com/download/data/3096", - "filename": "Cars", - "name": "Cars", + "description": "The two columns of the data are the prices and year purchased for 124 Mazda cars, as taken from the classified section of the Melbourne Age during the course of 1991. Hence the age of the car at the time can be calculated and used to model car price. ", + "download": "http://www.statsci.org/data/oz/mazdas.txt", + "filename": "mazdas", + "name": "Age and Price of Mazda Cars", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "A start-up company has developed an improved electronic chip for use in laboratory equipment. The company needs to project the manufacturing cost, so it develops a spreadsheet model that takes into account the purchase of production equipment, overhead, raw materials, depreciation, maintenance, and other business costs. The spreadsheet estimates the cost of producing 10,000 to […] ", - "download": "https://dasl.datadescription.com/download/data/3109", - "filename": "Chips", - "name": "Chips", + "description": "The data show the capital value and annual rental value of 96 domestic properties in Auckland in 1991. The aim was to explore their relationship in the hope of being able to predict capital value from rental value, thus the latter is the explanatory variable in this case.", + "download": "http://www.statsci.org/data/oz/rentcap.txt", + "filename": "rentcap", + "name": "Capital and Rental Values of Auckland Properties", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "-", - "download": "https://dasl.datadescription.com/download/data/3126", - "filename": "Computer-chip", - "name": "Computer chip manufacturing", + "description": "The data give the selling price at auction of 32 antique grandfather clocks. Also recorded is the age of the clock and the number of people who made a bid. \n\n\n\n\nVariable\n\nDescription\n\n\n\n\nAge\n\nAge of the clock (years)\n\nBidders\n\nNumber of individuals participating in the bidding\n\nPrice\n\nSelling price (pounds sterling)\n", + "download": "http://www.statsci.org/data/general/auction.txt", + "filename": "auction_", + "name": "Selling Price of Antique Grandfather Clocks", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "Dalia collects data via smartphone from users worldwide. This survey asked (among many other questions) about access to cars and the use of ride-hailing apps ", - "download": "https://dasl.datadescription.com/download/data/3153", - "filename": "Dalia", - "name": "Dalia", + "description": "The data were collected to study the variation in rent paid in 1977 for agricultural land planted to alfalfa in Minnesota. The data include: \n\n\nVariable\n\nDescription\n\n\nRent\n \naverage rent per acre planted to alfalfa\nAllRent\n \naverage rent paid for all tillable land\nCows\n \ndensity of diary cows (number per square mile)\nPasture\n \nproportion of farmland used as pasture\nLiming\n \nYes if liming is required to grow alfalfa; No otherwise\n", + "download": "http://www.statsci.org/data/general/landrent.txt", + "filename": "landrent", + "name": "Rent for Land Planted to Alfalfa", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "Disk drive capacity is often given in terabytes (TB), where 1 TB = 1000 gigabytes, or about a trillion bytes. A search of prices for external disk drives on Amazon.com in mid-2016 found the data on capacity and price. ", - "download": "https://dasl.datadescription.com/download/data/3167", - "filename": "Disk-drives", - "name": "Disk drives 2016", + "description": "Monthly observations on various share price and financial variables were recorded from October 1991 to August 1997. Data collected by Francine Pritchard and Glen Dixon for their MS305 data analysis project in 1997. \n\n\nVariable\n\nDescription\n\n\nBank\n\nShare Price Index\nAllOrds\n\n\nDevelop\n\n\nMining\n\n\nGold\n\n\nBuild\n\n\nProp\n\n\nIndust\n\n\nEnergy\n\n\nFinance\n\n\nResource\n\n\nTransport\n\n\nRetail\n\n\nUnemploy\n\nUnemployment Rate\nCPI\n\nConsumer Price Index\nBankBill\n\n90 Day Bank Bill Interest Rate\n", + "download": "http://www.statsci.org/data/oz/bankbill.txt", + "filename": "bankbill", + "name": "90 Day Bank Bills", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "Most water tanks have a drain plug so that the tank may be emptied when it’s to be moved or repaired. How long it takes a certain size of tank to drain depends on the size of the plug, as shown in the table. ", - "download": "https://dasl.datadescription.com/download/data/3175", - "filename": "Down-the-Drain", - "name": "Down the Drain", + "description": "The following data was collected in the 1960s at a house in south-east England. The weekly gas consumption (in 1000 cubic feet) and the average outside temperature (in degrees Celsius) was recorded for 26 weeks before and 30 weeks after cavity-wall insulation had been installed. The house thermostat was set at 20�C throughout. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nInsulate\n\nBefore or After\n\nTemp\n\nAverage outside temperature (C)\n\nGas\n\nGas consumption (1000's of cubic feet)\n", + "download": "http://www.statsci.org/data/general/insulgas.txt", + "filename": "insulgas", + "name": "House Insulation and Gas Consumption", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "A university teacher saved every e-mail receive from students in a large introductory statistics class during one term. He then counted, for each student who had sent him at least one e-mail, how many e-mails each student had sent. What is the distribution of e-mail communications? ", - "download": "https://dasl.datadescription.com/download/data/3181", - "filename": "E-mails", - "name": "E-mails", + "description": "Age specific term life premium rates for a sum insured of $50,000 are given in the table. The first column is the age of insured, the next two columns are the rates for male smokers and non-smokers, and the last two columns are the rates for female smokers and non-smokers. The four separate sets of points may be plotted and cubic spline regression used to fit them.", + "download": "http://www.statsci.org/data/oz/insure.txt", + "filename": "insure", + "name": "Insurance Premiums", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "Fuel economy (mpg) and the number of cylinders in a sample of cars. Data extracted from a larger cars dataset. ", - "download": "https://dasl.datadescription.com/download/data/3226", - "filename": "Fuel-economy-and-cylinders", - "name": "Fuel economy and cylinders", + "description": "The data give the Canadian automobile insurance experience for policy years 1956 and 1957 as of June 30, 1959. The data includes virtually every insurance company operating in Canada and was collated by the Statistical Agency (Canadian Underwriters' Association - Statistical Department) acting under instructions from the Superintendent of Insurance. The data given here is for private passenger automobile liability for non-farmers for all of Canada excluding Saskatchewan. \nThe variable Merit measures the number of years since the last claim on the policy. The variable Class is a collation of age, sex, use and marital status. The variables Insured and Premium are two measures of the risk exposure of the insurance companies. \n\n\n\n\nVariable\n\nDescription\n\n\n\n\nMerit\n\nMerit Rating:\n3 - licensed and accident free 3 or more years\n2 - licensed and accident free 2 years\n1 - licensed and accident free 1 year\n0 - all others\n\nClass\n\n1 - pleasure, no male operator under 25\n2 - pleasure, non-principal male operator under 25\n3 - business use\n4 - unmarried owner or principal operator under 25\n5 - married owner or principal operator under 25\n\nInsured\n\nEarned car years\n\nPremium\n\nEarned premium in 1000's\n(adjusted to what the premium would have been had all cars been written at 01 rates)\n\nClaims\n\nNumber of claims\n\nCost\n\nTotal cost of the claim in 1000's of dollars\n", + "download": "http://www.statsci.org/data/general/carinsca.txt", + "filename": "carinsca", + "name": "Canadian Automobile Insurance Claims for 1957-1958", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "An experiment to test a new gasoline additive, Gasplus, was performed on three different cars: a sports car, a minivan, and a hybrid. Each car was tested with both Gasplus and regular gas on 10 different occasions and their gas mileage was recorded.", - "download": "https://dasl.datadescription.com/download/data/3231", - "filename": "Gas-additives", - "name": "Gas additives", + "description": "The data give details of third party motor insurance claims in Sweden for the year 1977. \n\"In Sweden all motor insurance companies apply identical risk arguments to classify customers, and thus their portfolios and their claims statistics can be combined. The data were compiled by a Swedish Committee on the Analysis of Risk Premium in Motor Insurance. The Committee was asked to look into the problem of analyzing the real influence on claims of the risk arguments and to compare this structure with the actual tariff.\" \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nKilometres \n\nKilometres travelled per year\n1: < 1000\n2: 1000-15000\n3: 15000-20000\n4: 20000-25000\n5: > 25000\n\nZone \n\nGeographical zone\n1: Stockholm, Göteborg, Malmö with surroundings\n2: Other large cities with surroundings\n3: Smaller cities with surroundings in southern Sweden\n4: Rural areas in southern Sweden\n5: Smaller cities with surroundings in northern Sweden\n6: Rural areas in northern Sweden\n7: Gotland \n\nBonus\n\nNo claims bonus. Equal to the number of years, plus one, since last claim\n\nMake\n\n1-8 represent eight different common car models. All other models are combined in class 9\n\nInsured\n\nNumber of insured in policy-years\n\nClaims\n\nNumber of claims\n\nPayment\n\nTotal value of payments in Skr\n\n\n\n", + "download": "http://www.statsci.org/data/general/motorins.txt", + "filename": "motorins", + "name": "Third Party Motor Insurance in Sweden", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "Internet users 2014", - "download": "https://dasl.datadescription.com/download/data/3299", - "filename": "Internet-users", - "name": "Internet users 2014", + "description": "The data give the average claims for damage to the owner's car for privately owned and comprehensively insured vehicles in Britain in 1975. Averages are given in pounds sterling adjusted for inflation. \n\n\n\n\nVariable\n\nDescription\n\n\n\n\nOwnerAge\n\nPolicy-holder's age in years, categorized into 8 levels\n\nModel\n\nType of car, in 4 groups\n\nCarAge\n\nVehicle age in years, categorized into 4 levels\n\nNClaims\n\nNumber of claims\n\nAveCost\n\nAverage cost of each claim in pounds\n", + "download": "http://www.statsci.org/data/general/carinsuk.txt", + "filename": "carinsuk", + "name": "British Car Insurance Claims for 1975", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "iPod failures", - "download": "https://dasl.datadescription.com/download/data/3300", - "filename": "iPod-failures", - "name": "iPod failures", + "description": "Monthly data relating to hotels, motels and guesthouses in Victoria, from January 1980 to June 1995. First column: total number of room nights occupied; Second column: total takings from accommodation. ", + "download": "http://www.statsci.org/data/oz/motel.txt", + "filename": "motel", + "name": "Hotels, Motels and Guesthouses in Victoria", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "Richard DeVeaux owned a Nissan Maxima for 8 years. He\nrecorded the car’s fuel efficiency (in mpg) each time he filled the tank. He wanted to know what fuel efficiency to expect as “ordinary” for his car. Knowing this, he was able to predict when he’d need to fill the tank again and to notice if the fuel efficiency suddenly got worse, which could be a sign of trouble.", - "download": "https://dasl.datadescription.com/download/data/3367", - "filename": "Nissan", - "name": "Nissan", + "description": "The data list the CPI (Consumer Price Index) figures for five countries from 1985 to 1994. The countries are Australia, Canada, New Zealand, the United Kingdom and the United States. Each index is based on the December Quarter 1993 (1000).", + "download": "http://www.statsci.org/data/oz/cpifive.txt", + "filename": "cpifive", + "name": "CPI for Five Countries", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "Costs of construction for 32 light water nuclear plants.", - "download": "https://dasl.datadescription.com/download/data/3554", - "filename": "Nuclear-plants", - "name": "Nuclear plants", + "description": "Quarterly CPI indices for Brisbane for food, clothing, housing etc, from June 1972 to September 1997. \nThe groups are: Food, Clothing, Housing, Household equipment and operation, Transportation, Tobacco and Alcohol, Health and personal care, Recreation and education, and All groups. The CPI are standardized so that the year 1989-90 is 100.0. ", + "download": "http://www.statsci.org/data/oz/cpibris.txt", + "filename": "cpibris", + "name": "Brisbane Consumer Price Indices", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "Pew Research conducted a survey about social networking in several countries. They asked whether respondents had access to and used social networking. Responses were “yes” (use social networking), “no”, and “not available”.", - "download": "https://dasl.datadescription.com/download/data/3457", - "filename": "Social-networking", - "name": "Social networking", + "description": "Quarterly price indices for established homes in Australian capital cities, from June 1986 to June 1997. The price indices are standardized so that the year 1989-1990 is 100.0 for each city. ", + "download": "http://www.statsci.org/data/oz/houses.txt", + "filename": "houses", + "name": "House Price Indexes", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true - }, + } + ], + "subcategory_name": "Economics" + }, + { + "datasets": [ { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "Cnet.com tests tablet computers and continuously updates its list. As of January 2014, the list included the battery life (in hours) and luminous intensity (i.e., screen brightness, in cd/m^2). ", - "download": "https://dasl.datadescription.com/download/data/3474", - "filename": "Tablet-computers-2014", - "name": "Tablet computers 2014", + "description": "The following description is from Lee (1994): \nRugby football is a popular quasi-amateur sport widely played in the United Kingdom, France, Australia, New Zealand and South Africa. It is rapidly gaining popularity in the US, Canada, Japan and parts of Europe. Recently, some of the rules of the game have been changed, with the aim of making play more exciting. In a study to examine the effects of the rule changes, Hollings and Triggs (1993) collected data on some recent games. \nTypically, a game consists of bursts of activity which terminate when points are scored, if the ball is moved out of the field of play or if an infringement of the rules occurs. In 1992, the investigators gathered data on ten international matches which involved the New Zealand national team, the All Blacks. The first five games studied were the last international games played under the old rules, and the second set of five were the first internationals played under the new rules. \nFor each of the ten games, the data list the successive times (in seconds) of each passage of play in that game. One interest is to see whether the passages were on average longer or shorter under the new rules. (The intention when the rules were changed was almost certainly to make the play more continuous.) ", + "download": "http://www.statsci.org/data/oz/rugby.txt", + "filename": "rugby", + "name": "Time of Passages of Play in Rugby", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "Should you generate electricity with your own personal\nwind turbine? That depends on whether you have enough\nwind on your site. To produce enough energy, your site should\nhave an annual average wind speed above 8 miles per hour, according\nto the Wind Energy Association. One candidate site was\nmonitored for a year, with wind speeds recorded every 6 hours.\nA total of 1114 readings of wind speed averaged 8.019 mph with\na standard deviation of 3.813 mph. The data are provided.", - "download": "https://dasl.datadescription.com/download/data/3527", - "filename": "Wind-power", - "name": "Wind power", + "description": "Mark Taylor was Captain of the Australian test cricket team from May 1994 until February 1999. By the middle of 1997, the Australian team has won its 7 consecutive international test series, making Taylor the most successful Australian Captain in history. However his poor batting form from mid 1996 to mid 1997 gave the Australian selectors a dilemma in deciding whether his excellent Captaincy made up for the run of poor scores off his own bat. \nThe data below gives Mark Taylor's test scores from the middle of 1989 to the middle of 1995, a period over which he was batting well. Scores were made in Australia's first or second innings of each match. Sometimes Australia was not required to bat twice, in which case the second innings is marked as missing. There are also a number of `not outs'.", + "download": "http://www.statsci.org/data/oz/taylor.txt", + "filename": "taylor_", + "name": "Mark Taylor's Test Cricket Scores", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "The data set gives a random sample of the length of visits of users entering the msnbc.com web site during September 28, 1999.\nThe length of the visit is an estimate of the total number of clicks or pages seen by each user and is based on web server \nlogs, thus it counts only pages recorded by the server. Pages cached in the user's browser or in a cache proxy server are \nunknown. The data set used in the paper is much larger than the one made available here but that larger data set is also \navailable in a page cited in the references. ", - "download": "http://jse.amstat.org/datasets/msnbclength.dat.txt", - "filename": "msnbclength", - "name": "Internet Data Analysis for Undergrad Curriculum", + "description": "The Sydney-Hobart yacht race starts from Sydney Harbour on Boxing day (December 26) and finishes several days later in Hobart. It is a 630 nautical mile ocean race. The data give the winning times from 1945 to 1993, as they appeared in the Sydney Morning Herald on 24 December, 1994, plus the winning times for 1994 to 1997. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nYacht\n\nName of winning yacht\n\nYear\n\nYear\n\nDays\n\nDays unit of winning time\n\nHours\n\nHours unit of winning time\n\nMinutes\n\nMinutes unit of winning time\n\nTime\n\nWinning time in minutes (should match time in Days, Hours and Minutes)\n", + "download": "http://www.statsci.org/data/oz/sydhob.txt", + "filename": "sydhob", + "name": "Sydney to Hobart Yacht Race Winning Times", "number_format": 31, "remove_quotes": true, - "separator": ",", + "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, - "use_first_row_for_vectorname": false + "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "The video lottery terminal dataset contains observations on the three\nwindows of an electronic slot machine for 345 plays together with the\nprize paid out for each play. The prize payout distribution is so\nbadly skewed that confidence intervals for expected payout based on the\ncentral limit theorem are not accurate. The dataset can be used at the\ngraduate or upper undergraduate level to illustrate parametric\nbootstrapping. The dataset can also be used in a graduate course to\nillustrate tests of independence for two and three-way contingency\ntables involving random zeroes, or these tables may be collapsed and\nused as examples in an introductory course.", - "download": "http://jse.amstat.org/datasets/vlt.dat.txt", - "filename": "vlt_", - "name": "Video Lottery Terminal Data", + "description": "Winning heights or distances (inches) for the High Jump, Discus and Long Jump events at the Olympics up to 1996. ", + "download": "http://www.statsci.org/data/general/olympic.txt", + "filename": "olympic", + "name": "Olympic Records for High Jump, Discus and Long Jump", "number_format": 31, "remove_quotes": true, - "separator": "SPACE", + "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, - "use_first_row_for_vectorname": false - } - ], - "subcategory_name": "Technology" - }, - { - "datasets": [ + "use_first_row_for_vectorname": true + }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "The Pew Research Center conducted a representative telephone survey in October of 2016. Among the reported results was the following table concerning the preferred political party affiliation of respondents and their ages for white voters. Is there evidence of age-based differences in party affiliation in the United States for white voters? ", - "download": "https://dasl.datadescription.com/download/data/3045", - "filename": "Age-and-party", - "name": "Age and party 2016", + "description": "The data give the number of medals won by each medal-winning country in the 1992 Summary Olympic Games in Barcelona, Spain, and the 1994 Winter Olympic Games in Lillehammer, Norway. Also given is the population and latitude of each country. Griffiths et al write: \n... the media spent a lot of time discussing the number of medals won by each country's athletes. The implication was that the comparison was of some importance. However, larger countries would be expected to win more medals than smaller countries, simply because of their larger populations. \n... some viewers, especially those from the smaller countries, felt that the number of medals should be standardised to account for the very wide range of populations, and that a per capita number of medals for a country was a fairer comparison. Others felt that this was unfair to the countries with larger populations - that having twice as many people did not lead to twice as many medals. If standardisation is performed adequately, there should be no systematic relationship between the adjusted medal count and population. \nAlso countries further from the equator might be expected to do better in the winter olympics. \nThe data is incomplete in that countries with no medals are not included. These would be mostly smaller population countries. ", + "download": "http://www.statsci.org/data/oz/medals.txt", + "filename": "medals", + "name": "Olympic Medals", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "The outcome of the 2000 U.S. presidential election was determined in Florida amid much\ncontroversy. Even years later, historians continue to debate who really received the most\nvotes. The main race was between George W. Bush and Al Gore, but two minor candidates\nplayed a significant role. To the political right of the major party candidates was Pat\nBuchanan, while to the political left was Ralph Nader. Generally, Nader earned more votes\nthan Buchanan throughout the state. We would expect counties with larger vote totals to\ngive more votes to each candidate. The dataset gives Buchanan’s and Nader’s vote totals by\ncounty in the state of Florida. Plot to identify the outlier and consider what it means.", - "download": "https://dasl.datadescription.com/download/data/3187", - "filename": "Election-2000", - "name": "Election 2000", + "description": "This data set was assembled by Rowan Todd and Mark McNaughton, two students studying Statistics at QUT in a class taught by Dr Margaret Mackisack. For a class project they decided to investigate the effect on football game attendance of various covariates. They collected data involving Saturday Australian Football League (AFL) matches at the Melbourne Cricket Ground (MCG). They looked only at matches during the normal home and away season (i.e. not including finals). They used statistics from all such games in 1993 and 1994 (nineteen relevant matches in 1993 and twenty-two in 1994). The response variable measured was attendance at the MCG, and after consideration, they came up with the following covariates: \n\n\n\n\n\nVariable \n\nDescription\n\n\n\n\nMCG \n\nAttendance at the MCG in 1000's.\n\nTemp \n\nTemperature. The forecast maximum temperature on the day of the match, in whole degrees C, found in The Weekend Australian.\n\nOther\n\nAttendance at other matches in 1000's. The sum of the attendances at other AFL matches in Melbourne and Geelong on the same day as the match in question.\n\nMembers\n\nMembership. The sum of the memberships of the two clubs whose teams were playing the match in question in 1000's.\n\nTop50\n\nNumber of players from the top fifty. The number of players in the top 50 in the AFL who happened to be playing in the match in question.\n\nDate\n\nDate of the match in the format dd/mm/yy.\n\nHome\n\nAbbreviation for home team.\n\nAway\n\nAbbreviation for away team.\n", + "download": "http://www.statsci.org/data/oz/afl.txt", + "filename": "afl", + "name": "AFL Crowd Attendance at the MCG", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "-", - "download": "https://dasl.datadescription.com/download/data/3201", - "filename": "Female-president", - "name": "Female president", + "description": "The following description is from Lee (1994): \nRugby football is a popular quasi-amateur sport widely played in the United Kingdom, France, Australia, New Zealand and South Africa. It is rapidly gaining popularity in the US, Canada, Japan and parts of Europe. Recently, some of the rules of the game have been changed, with the aim of making play more exciting. In a study to examine the effects of the rule changes, Hollings and Triggs (1993) collected data on some recent games. \nTypically, a game consists of bursts of activity which terminate when points are scored, if the ball is moved out of the field of play or if an infringement of the rules occurs. In 1992, the investigators gathered data on ten international matches which involved the New Zealand national team, the All Blacks. The first five games studied were the last international games played under the old rules, and the second set of five were the first internationals played under the new rules. \nFor each of the ten games, the data list the successive times (in seconds) of each passage of play in that game. One interest is to see whether the passages were on average longer or shorter under the new rules. (The intention when the rules were changed was almost certainly to make the play more continuous.) \n", + "download": "http://www.statsci.org/data/oz/rugby.txt", + "filename": "rugby_", + "name": "Time of Passages of Play in Rugby", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "How accurate are pollsters in predicting the outcomes of Congressional elections? The table shows the actual number of Democrat seats in the House of Representatives and the number predicted by the Gallup organization for nonpresidential election years in the 4 decades following World War II. ", - "download": "https://dasl.datadescription.com/download/data/3564", - "filename": "Polling", - "name": "Polling", + "description": "The data give the record-winning times for 35 hill races in Scotland, as reported by Atkinson (1986). The distance travelled and the height climbed in each race is also given. The data contains a known error - Atkinson (1986) reports that the record for Knock Hill (observation 18) should actually be 18 minutes rather than 78 minutes. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nRace\n\nName of race\n\nDistance\n\nDistance covered in miles\n\nClimb\n\nElevation climbed during race in feet\n\nTime\n\nRecord time for race in minutes\n", + "download": "http://www.statsci.org/data/general/hills.txt", + "filename": "hills_", + "name": "Scottish Hill Races", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "For each U.S. Senator, his or her votes on whether to remove President\nClinton on each of the two articles of impeachment (plus a summary\nvariable representing each Senator's number of \"guilty\" votes) are\nprovided, as well as each Senator's values on several variables that\ncould be predictive of vote (e.g., Senator's degree of conservatism,\nhow well Clinton did in the Senator's state in the 1996 Presidential\nelection).", - "download": "http://jse.amstat.org/datasets/impeach.dat.txt", - "filename": "impeach", - "name": " U.S. Senate Votes on Clinton Removal", + "description": "Investigators studied physical characteristics and ability in 13 football punters. Each volunteer punted a football ten times. The investigators recorded the average distance for the ten punts, in feet. They also recorded the average hang time (time the ball is in the air before the receiver catches it) for the ten punts, in seconds. In addition, the investigators recorded five measures of strength and flexibility for each punter: right leg strength (pounds), left leg strength (pounds), right hamstring muscle flexibility (degrees), left hamstring muscle flexibility (degrees), and overall leg strength (foot-pounds). From the study \"The relationship between selected physical performance variables and football punting ability\" by the Department of Health, Physical Education and Recreation at the Virginia Polytechnic Institute and State University, 1983. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nDistance\n\nDistance travelled in feet\n\nHang\n\nTime in air in seconds\n\nR_Strength\n\nRight leg strength in pounds\n\nL_Strength\n\nLeft leg strength in pounds\n\nR_Flexibility\n\nRight leg flexibility in degrees\n\nL_Flexibility\n\nLeft leg flexibility in degrees\n\nO_Strength\n\nOverall leg strength in pounds\n", + "download": "http://www.statsci.org/data/general/punting.txt", + "filename": "punting", + "name": "American Football Punters", "number_format": 31, "remove_quotes": true, - "separator": "SPACE", + "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, - "use_first_row_for_vectorname": false + "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "The data consist of the numbers of days served in office for the 43 \nPresidents of the United States as of 4 February 2004. ", - "download": "http://jse.amstat.org/datasets/outlier.dat.txt", - "filename": "outlier", - "name": "A Dataset That Is 44% Outliers", + "description": "Data on 102 male and 100 female athletes collected at the Australian Institute of Sport, courtesy of Richard Telford and Ross Cunningham. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nSport\n\nSport\n\nSex\n\nmale or female\n\nHt\n\nHeight in cm\n\nWt\n\nWeight in kg\n\nLBM\n\nLean body mass\n\nRCC\n\nRed cell count\n\nWCC\n\nWhite cell count\n\nHc\n\nHematocrit\n\nHg\n\nHemoglobin\n\nFerr\n\nPlasma ferritin concentration\n\nBMI\n\nBody mass index = weight/height^2\n\nSSF\n\nSum of skin folds\n\n%Bfat\n\n% body fat\n\n\n\n", + "download": "http://www.statsci.org/data/oz/ais.txt", + "filename": "ais_", + "name": "Australian Institute of Sport", "number_format": 31, "remove_quotes": true, - "separator": "SPACE", + "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, - "use_first_row_for_vectorname": false - } - ], - "subcategory_name": "Politics" - }, - { - "datasets": [ + "use_first_row_for_vectorname": true + }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "1998 Baby data from http://www.nber.org/natality/ftp.cdc.gov/pub/Health_Statistics/NCHS/Dataset_Documentation/DVS/natality/", - "download": "https://dasl.datadescription.com/download/data/3059", - "filename": "Babysamp", - "name": "Babysamp 98", + "description": "The data was collected by Grant Elliott, a statistics student at the Queensland University of Technology in a subject taught by Dr Margaret Mackisack. Here is his description of the data and its collection: \nLiving at a squash court spurred on the idea of this experiment. Frustrated playing squash one night, I thought that the squash ball I was playing with seemed to bounce and react differently to what I was previously used to. So I conducted this experiment on the squash ball, looking at the type of ball, temperature of the ball and the age of the ball. \nBall type: In this experiment I used a 'yellow dot' squash ball and a 'double x' squash ball. A 'yellow dot' is super slow and a 'double x' is termed extra super slow. \nTemperature: When playing with a squash ball it tends to heat up. So I took it to extremes where I had 'room temperature' and 'playing temperature'. To duplicate 'playing temperature' the ball was placed in a cup of boiling water for 45 sec. \nAge: I expected age to be my most significant factor. Squash balls, being a sealed ball, shouldn't vary when they get older, so I used a new ball and compared it to an old ball. \nProcedure: I first thought of dropping the balls from a set height and seeing how far they bounced against a tape measure. This idea was scrapped as too much error came into it because you couldn't accurately measure when the maximum height of the bounce was. I then thought of a ball machine. I set the ball machine up and measured how far back did the ball come off the front wall when shot out of the ball machine. This eliminated a lot of varying in my figures as the ball machine shoots the balls out at roughly the same speed and trajectory. It doesn't take all the varying out as I wouldn't know whether the ball machine does shoot it out at exactly the same speed, but it keeps variation to a minimum. \nCriticism: Measuring the distance from the wall was done by my friend and I. We both would watch from different angles and would see where the ball landed. This means our figures are probably out by a couple of centimetres. When the balls were dropped into the water I forgot to take some of them out after 45 sec. Also with some I moved them around in the water to get the heat distributed evenly but others I forgot to move as I was collecting and organising the next ball. Another criticism is the temperature of the water. I put new boiling water into the cup after 4 balls had been in it. Therefore the last ball to go in wouldn't be the same temperature as the first ball.", + "download": "http://www.statsci.org/data/oz/squash.txt", + "filename": "squash", + "name": "Squash Ball Experiment", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "Births per 1000 population in the United States, starting in 1965. There has been concern that the birthrate may be declining. A good model for tends in birthrate may allow for some prediction. ", - "download": "https://dasl.datadescription.com/download/data/3075", - "filename": "Birthrates-2015", - "name": "Birthrates 2015", + "description": "The dynamic and repetitive nature of running means that runners are particularly prone to over-use injuries such as lower back pain. Chronic pain is often caused by muscle imbalances, which result in faulty alignment of and abnormal stresses applied to the spinal column. Muscle imbalances originate as adapations in motor control due to pain or external stimuli, and are then reinforced and preserved by repetition. \nThis study, conducted by Physiotherapy student Andrew Mooney, examined the flexibility of four major muscle groups associated with movement of the hip, with particular attention to imbalances between the left and right sides or between the dominant and non-dominant sides. \nA total of 33 male subjects were included in the study. The subjects were divided into three groups: 11 runners with low back pain, 11 runners without low back pain and 11 sedentary individuals without low back pain. (Runners were recruited from the Ashgrove and Toowong athletics clubs, non runners from the University of Queensland and the general community. Runners with lower back pain were recruited first. Once this subject group was tested, subjects for the two control groups were recruited to match the runners with low back pain according to age, height and weight.) \nThe muscle groups examined were \nthe iliopsoas, \nthe rectus femoris, \nthe tensor fascia lata/iliotibial band (ITB/TFL), and \nthe hamstrings \nFor each muscle group, two measures of flexibility were used. The first, relative flexibility, was related to the range of movement of the joint before postural compensations occurred, and the second was a measure of the maximal functional length of the muscle. Relative flexibility and functional length were measured for each muscle group on both the left and right sides of the body. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nSubject \n\nSubject number, 1 - 33\n\nGroup\n\nPain, NoPain or Sedentary\n\nMatch\n\n1 - 11, indicating matched triples\n\nAge\n\nAge of subject in years\n\nHeight\n\nHeight of subject in cm\n\nWeight\n\nWeight of subject in kg\n\nDistance\n\nType of running event: Sprint, middle distance (Mid) or long distance (Long)\n\nYears\n\nNumber of years running\n\nDominant\n\nDominant side, Left or Right\n\nDF.Iliopsoas\n\nRelative flexibility of iliopsoas muscle on dominant side in degrees. Positive numbers indicate above the horizontal, negative numbers below the horizontal.\n\nDF.Rectus\n\nRelative flexibility of rectus femoris muscle on dominant side in degrees\n\nDF.ITBTFL\n\nRelative flexibility of ITB/TFL muscle on dominant side in degrees\n\nDF.Hamstring\n\nRelative flexibility of hamstring muscles on dominant side in degrees\n\nDL.Iliopsoas\n\nFunctional length of iliopsoas muscle on dominant side\n\nDL.Rectus\n\nFunctional length of rectus femoris muscle on dominant side\n\nDL.ITBTFL\n\nFunctional length of ITB/TFL muscle on dominant side\n\nDL.Hamstring\n\nFunctional length of hamstring muscles on dominant side\n\nNF.Iliopsoas\n\nRelative flexibility of iliopsoas muscle on nondominant side in degrees\n\nNF.Rectus\n\nRelative flexibility of rectus femoris muscle on nondominant side in degrees\n\nNF.ITBTFL\n\nRelative flexibility of ITB/TFL muscle on nondominant side in degrees\n\nNF.Hamstring\n\nRelative flexibility of hamstring muscles on nondominant side in degrees\n\nNL.Iliopsoas\n\nFunctional length of iliopsoas muscle on nondominant side\n\nNL.Rectus\n\nFunctional length of rectus femoris muscle on nondominant side\n\nNL.ITBTFL\n\nFunctional length of ITB/TFL muscle on nondominant side\n\nNL.Hamstring\n\nFunctional length of hamstring muscles on nondominant side\n", + "download": "http://www.statsci.org/data/oz/backpain.txt", + "filename": "backpain", + "name": "Runners with Low Back Pain", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "In a Chance magazine article (Summer 2005), Danielle Vasilescu and Howard Wainer used data from the United Nations Center for Human Settlements to investigate aspects of living conditions for several countries. Among the variables they looked at were the country’s per capita gross domestic product (GDP, in $) and Crowdedness, defined as the average number of persons per room living in homes there.\nVasilescu and Wainer re-express GDP to -10000/GDP. Doing that reveals an outlier that may be due to an error in the data.", - "download": "https://dasl.datadescription.com/download/data/3148", - "filename": "Crowdedness", - "name": "Crowdedness", + "description": "The data come from the 1990 Pilot Surf/Health Study of NSW Water Board. The first column takes values 1 or 2 according to the recruit's perception of whether (s)he is a Frequent OCean Swimmer, the second column has values 1 or 4 according to recruit's usually chosen swimming location (1 for non-beach, 4 for beach), the third column has values 2 (aged 15-19), 3 (aged 20-25), or 4 (aged 25-29), the fourth column has values 1 (male) or 2 (female) and finally, the fifth column has the number of self-diagnosed ear infections that were reported by the recruit.", + "download": "http://www.statsci.org/data/oz/earinf.txt", + "filename": "earinf", + "name": "Ear Infections in Swimmers", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true - }, + } + ], + "subcategory_name": "Sport" + }, + { + "datasets": [ { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "-", - "download": "https://dasl.datadescription.com/download/data/3237", - "filename": "GDP-DJIA", - "name": "GDP and DJIA 2017", + "description": "After purchasing a batch of flight helmets that did not fit the heads of many pilots, the NZ Airforce decided to mesure the headsizes of all recruits. Before this was carried out, information was collected to determine the feasibility of using cheap cardboard callipers to make the measurements, instead of metal ones which were expensive and uncomfortable. The data lists the head diameters of 18 recruits measured once using cardboard callipers and again using metal callipers. One question is whether there is any systematic difference between the two sets of callipers. One might also ask whether there is more variability in the cardboard callipers measurement than that of the metal callipers. ", + "download": "http://www.statsci.org/data/oz/nzhelmet.txt", + "filename": "nzhelmet", + "name": "Helmet Sizes for New Zealand Airforce", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "Data for 800 respondents in each of five countries. The variables provide demographic information (sex, age, education, marital status) and responses to questions of interest to marketers on personal finance and purchasing. ", - "download": "https://dasl.datadescription.com/download/data/3242", - "filename": "Global", - "name": "Global", + "description": "These data were collected as part of a project for the Federal Office for Road Safety conducted by the Research Institute of Gender and Health at the University of Newcastle. There is evidence that women drivers who are involved in motor vehicle accidents are more likely than men to be injured. A possible reason is that women often drive smaller cars that provide less protection in a collision. One of the aims of the project was to examine preferences for cars among men and women and investigate the extent to which safety was a factor in determining preferences. \nThe survey was conducted by research assistants who asked people in car parks to participate and administered a structured questionnaire. They were instructed to obtain data from men and women with small, medium and large cars, with 50 people per group for a total of 300 respondents. (The sample size was based on power requirements for another part of the survey that involved anthropometric measurements.) The research assistants approached people in car parks of the University of Newcastle and nearby shopping centres during December 1997 and January 1998. \nThe data consist of 300 records each with 22 variables. The variables are: \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nID\n\nIdentification number of respondent\n\nAge\n\nAge of respondent (years)\n\nSex\n\n1=female, 2=male\n\nLicYr\n\nTime they have held a full driving licence, in years and months (years)\n\nLicMth\n\nTime they have held a full driving licence, in years and months (months)\n\nActCar\n\nMake, model and year of car most often driven, coded to size of car 1=small, 2=medium, 3=large\n\nKids5\n\nChildren under five, 1=yes, 2=no\n\nKids6\n\nChildren 6 to 16, 1=yes, 2=no\n\nPrefCar\n\nPreferred car, coded to size of car 1=small, 2=medium, 3=large\n\nCar15k\n\nPreferred type of car if cost $15000, 1=small new car; 2=large second-hand car\n\nReason\n\n1=safety, 2=reliability, 3=cost, 4=performance, 5=comfort, 6=looks\n\nCost\n\nHow important is cost when buying a car? 1=not important, 2=little importance, 3=important, 4=very important\n\nReliable\n\nHow important is reliability ...?\n\nPerform\n\nHow important is performance ...?\n\nFuel\n\nHow important is fuel consumption ...?\n\nSafety\n\nHow important is safety ...?\n\nAC/PS\n\nHow important is air conditioning/power steering ...?\n\nPark\n\nHow important is ease of parking ...?\n \nRoom\n \nHow important is space/roominess ...?\n \nDoors\n \nHow important is the number of doors ...?\n \nPrestige\n \nHow important is prestige/style ...?\n \nColour\n \nHow important is colour ...?\n", + "download": "http://www.statsci.org/data/oz/carprefs.txt", + "filename": "carprefs", + "name": "Car Preferences", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "The dataset gives profits (in $M) for 30 of the 500 largest global corporations (as measured by revenue). ", - "download": "https://dasl.datadescription.com/download/data/3243", - "filename": "Global500-2014", - "name": "Global500 2014", + "description": "Do you use up the same amount of the soap in the shower each morning, or does it depend on the size of the bar of soap? This data was collected by Rex Boggs of Glenmore State High School in Rockhampton, Queensland. Rex writes: \nI had a hypothesis that the daily weight of my bar of soap in my shower wasn't a linear function, the reason being that the tiny little bar of soap at the end of its life seemed to hang around for just about ever. I wanted to throw it out, but I felt I shouldn't do so until it became unusable. And that seemed to take weeks. \nAlso I had recently bought some digital kitchen scales and felt I needed to use them to justify the cost. I hypothesised that the daily weight of a bar of soap might be dependent upon surface area, and hence would be a quadratic function. \nI kept records for three weeks (the life of the bar), and was amazed to find that the data was linear with a very high R2 value, until the last few days of its life. \nThe data ends at day 22. On day 23 the soap broke into two pieces and one piece went down the plughole ... \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nDate \n\nDate of observation\n\nDay \n\nNumber of days since beginning of experiment\n\nWeight \n\nWeight of soap bar (grams)\n\n\n\n", + "download": "http://www.statsci.org/data/oz/soap.txt", + "filename": "soap", + "name": "Bar of Soap", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "In an investigation of environmental causes of disease, data were collected on the annual mortality rate (deaths per 100,000) for males in 61 large towns in England and Wales. In addition, the water hardness was recorded as the calcium concentration (parts per million, ppm) in the drinking water.", - "download": "https://dasl.datadescription.com/download/data/3255", - "filename": "Hard-water", - "name": "Hard water", + "description": "These data are for specimens of 50 varieties of timber, for modulus of rigidity, modulus of elasticity and air dried density, arranged in increasing order of magnitude of the density. ", + "download": "http://www.statsci.org/data/oz/timber.txt", + "filename": "timber", + "name": "Timber Data", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "The United Nations Development Programme (UNDP) uses the Human Development Index (HDI) in an attempt to summarize in one number the progress in health, education, and economics of a country. In 2015, the HDI was as high as 0.94 for Norway and as low as 0.35 for Niger. ", - "download": "https://dasl.datadescription.com/download/data/3258", - "filename": "HDI-2015", - "name": "HDI 2015", + "description": "A soft drink bottler is analyzing vending machine service routes in his distribution system. He is interested in predicting the amount of time required by the route driver to service the vending machines in an outlet. This service activity including stocking the machine with beverage products and minor maintenance or housekeeping. The industrial engineer responsible for the study has suggested that the two most important variables affecting the delivery time are the number of cases of product stocked and the distance walked by the route driver. The engineer has collected 25 observations on delivery time (minutes), number of cases and distance walked (feet).", + "download": "http://www.statsci.org/data/general/softdrin.txt", + "filename": "softdrin", + "name": "Soft Drink Delivery Times", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "The United Nations Development Programme (UNDP) uses the Human Development Index (HDI) in an attempt to summarize in one number the progress in health, education, and economics of a country. ", - "download": "https://dasl.datadescription.com/download/data/3259", - "filename": "HDI-2016", - "name": "HDI 2016", + "description": "Impact strength of insulation cuts in foot-pounds. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nLot\n\nLot of insulating material\n\nCut\n\nLengthwise (Length) or crosswise (Cross)\n\nStrength\n\nImpact strength in foot-pounds\n\n\n\n", + "download": "http://www.statsci.org/data/general/insulate.txt", + "filename": "insulate", + "name": "Impact Strength Of Insulation Cuts", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "Life expectancy at birth in 195 countries. ", - "download": "https://dasl.datadescription.com/download/data/3312", - "filename": "Life-Expectancy", - "name": "Life Expectancy", + "description": "The data was collected by Stewart Fischer and David Tippetts, statistics students at the Queensland University of Technology in a subject taught by Dr Margaret Mackisack. Here is their description of the data and its collection: \nThe experiment decided upon was to see if by using two different designs of paper aeroplane, how far the plane would travel. In considering this, the question arose, whether different types of paper and different angles of release would have any effect on the distance travelled. Knowing that paper aeroplanes are greatly influenced by wind, we had to find a way to eliminate this factor. We decided to perform the experiment in a hallway of the University, where the effects of wind can be controlled to some extent by closing doors. \nIn order to make the experimental units as homogeneous as possible we allocated one person to a task, so person 1 folded and threw all planes, person 2 calculated the random order assignment, measured all the distances, checked that the angles of flight were right, and checked that the plane release was the same each time. \nThe factors that we considered each had two levels as follows: \nPaper: A4 size, 80gms and 50gms\nDesign: High Performance Dual Glider, and Incredibly Simple Glider (patterns attached to original report)\nAngle of release: Horizontal, or 45 degrees upward. \nThe random order assignment was calculated using the random number function of a calculator. Each combination of factors was assigned a number from one to eight, the random numbers were generated and accordingly the order of the experiment was found. \n\n\n\n\nVariable\n\nDescription\n\n\n\n\nDistance\n\nDistance travelled in mm\n\nPaper\n\n80gms = 1, 50gms = 2\n\nAngle\n\nHorizontal = 1, 45 degrees = 2\n\nDesign\n\nHigh-performance = 1, Incredibly simple = 2\n\nOrder\n\nOrder in which the runs were conducted\n", + "download": "http://www.statsci.org/data/oz/planes.txt", + "filename": "planes", + "name": "Paper Plane Experiment", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "Here is a table from the National Vital Statistics Report that gives the Life Expectancy for white males in the United States every decade during the 20th century (1 = 1900 to 1910, 2 = 1911 to 1920, etc.). Does a linear model relating life expectancy to decade fit? Would re-expressing either variable help?", - "download": "https://dasl.datadescription.com/download/data/3313", - "filename": "Life-expectancy-US", - "name": "Life expectancy US", + "description": "\"Discovery Day\" is a day set aside by the United States Naval Postgraduate School in Monterey, California, to invite the general public into its laboratories. On Discovery Day, 21 October 1995, data on reaction time and hand-eye coordination were collected on 118 members of the public who visited the Human Systems Integration Laboratory. The age and sex of each subject were also recorded. Visitors were mostly in family groups. \nOne experiment which demonstrates motor learning and hand-eye coordination, is rotary pursuit tracking. The equipment used has a rotating disk with a 3/4\" target spot. The subject’s task is to maintain contact with the target spot with a metal wand. Trials were conducted for 15 seconds at a time, and the total contact time during the 15 seconds was recorded. Four trials were recorded for each of 108 subjects. \nThe target spot on the Circle tracker keeps constant speed in a circular path. The target spot on the Box tracker has varying speeds as it traverses the box, making the task potentially more difficult. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nSex\n\nMale (M) or female (F)\n\nAge\n\nAge of subject in years\n\nShape\n\nBox or Circle\n\nTrial1\n\nContact time for 1st trial\n\nTrial2\n\nContact time for 2nd trial\n\nTrial3\n\nContact time for 3rd trial\n\nTrial4\n\nContact time for 4th trial\n\n\n\n", + "download": "http://www.statsci.org/data/general/tracking.txt", + "filename": "tracking", + "name": "Rotary Pursuit Tracking", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "Age at first marriage has changed over the course of the past century. In addition, the difference in the age of the husband and of the wife at first marriage has changed. Both the ages and the difference in ages can be interesting to analyze. ", - "download": "https://dasl.datadescription.com/download/data/3329", - "filename": "Marriage-age-2015", - "name": "Marriage age 2015", + "description": "Experiment conducted by Bill Afantenou, second year statistics student at QUT. Here is his description of the experiment: \n``As I am a big pizza lover, I had much pleasure in involving pizza in my experiment. I became curious to find out the time it took for a pizza to be delivered to the front door of my house. I was interested to see how, by varying whether I ordered thick or thin crust, whether Coke was ordered with the pizza and whether garlic bread was ordered with the pizza, the response would be affected. \n``Because of my current financial status and limitation of time, I decided to have only two replicates, just to get a reasonable estimate of the variance. To decrease my financial burden I managed a deal with the manager of the pizza shop. I managed to get the pickup special, delivered to my house, which was the cheapest and smallest pizza made. I tried to repeat the experiment in as nearly as possible identical conditions to reduce `noise'. \n``I ordered the pizza from the same shop, being Domino's Pizza. To be consistent I ordered a Supreme pizza each time at approximately the same time of day. The response was measured from the time I closed the telephone to the time the pizza was delivered to the front door of my house. \n``I wrote each of the eight treatments on a piece of paper twice, put them all into a hat, mixed them up, and took them out one at a time to allocate the order in which each treatment was done. \n``As well as the response and treatment for each pizza delivery the actual hour of delivery was recorded, also the order in which the treatments were done and whether the driver was male or female.'' \n\n\n\n\nVariable\n\nDescription\n\n\n\n\nCrust\n\nThin=0, Thick=1\n\nCoke\n\nNo=0, Yes=1\n\nBread\n\nGarlic bread. No=0, Yes=1\n\nDriver\n\nMale=M, Female=F\n\nHour\n\nTime of order in hours since midnight\n\nDelivery\n\nDelivery time in minutes\n", + "download": "http://www.statsci.org/data/oz/pizza.txt", + "filename": "pizza", + "name": "Pizza Delivery Experiment", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "Age at first marriage has changed over the course of the past century. In addition, the difference in the age of the husband and of the wife at first marriage has changed. Both the ages and the difference in ages can be interesting to analyze. ", - "download": "https://dasl.datadescription.com/download/data/3330", - "filename": "Marriage-age-2016", - "name": "Marriage age 2016", + "description": "An experiment is conducted to compare the energy requirements of three physical activities: running, walking and bicycle riding. Eight subjects are asked to run, walk and bicycle a measured distance, and the number of kilocalories expended per kilometre is determined for each subject during each activity. The activities are run in random order with time for recovery between activities. Each activity was monitored exactly once for each individual. ", + "download": "http://www.statsci.org/data/general/energy.txt", + "filename": "energy", + "name": "Energy Requirements Running, Walking and Cycling", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "The estimated median age at fist marriage by sex from 1890 to 2017 is provided by the U.S. Census bureau. Since 1960, marriage ages have been increasing steadily. Has the difference between men’s and women’s first marriage age changed? ", - "download": "https://dasl.datadescription.com/download/data/3331", - "filename": "Marriage-age-2017", - "name": "Marriage age 2017", + "description": "The data set comprises the results of a saturated 215-11 fractional factorial with 4 observations per run. There were 15 controllable factors. The responses are the proportional shrinkage of four samples taken from 3000-foot lengths of speedometer cable manufactured at each set of conditions. The objective was to reduce the post-extrusion shrinkage of the speedometer casing. \n\n\n\n\nVariable\n\nDescription\n\n\n\n\nA\n\nline OD\n\nB\n\nliner die\n\nC\n\nliner material\n\nD\n\nliner line speed\n\nE\n\nwire braid type\n\nF\n\nbraiding tension\n\nG\n\nwire diameter\n\nH\n\nliner tension\n\nI\n\nliner temperature\n\nJ\n\ncosting material\n\nK\n\ncoating die type\n\nL\n\nmelt temperature\n\nM\n\nscreen pack\n\nN\n\ncooling method\n\nO\n\nline speed\n\ny1\n\nshrinkage value of first sample\n\ny2\n\nshrinkage value of second sample\n\ny3\n\nshrinkage value of third sample\n\ny4\n\nshrinkage value of fourth sample\n", + "download": "http://www.statsci.org/data/general/speedome.txt", + "filename": "speedome", + "name": "Speedometer-Cable Shrinkage", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "Source: JAMA 284 [2000]:335–341) \nNumber of Cases: 278", - "download": "https://dasl.datadescription.com/download/data/3506", - "filename": "Twin-Births", - "name": "Twin Births", + "description": "The data are from a Proctor and Gamble study reported by Smith and Dubey (1964) on the amount of available chlorine in a product as a function of time since manufacture. Theoretical considerations lead to the model \nChlorine = a + (0.49 - a) exp{ -b (Weeks - 8) } \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nWeeks\n\nTime in weeks since manufacture\n\nChlorine\n\nAvailable chlorine\n", + "download": "http://www.statsci.org/data/general/chlorine.txt", + "filename": "chlorine", + "name": "Available Chlorine", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "In January 2012, the New York Times\npublished a story called “Twin Births in the U.S., Like Never\nBefore,” in which they reported a 76 percent increase in the\nrate of twin births from 1980 to 2009. The dataset gives the number\nof twin births each year (per 1000 live births). Can you confirm the Times report?\nThe dataset also includes the atmospheric CO2 levels (ppm) for those years to offer an alternative predictor in case there appears to be an argument for causation.", - "download": "https://dasl.datadescription.com/download/data/3505", - "filename": "Twins-by-Year", - "name": "Twins by Year 2014", + "description": "The data give the normalized magnitudes of the voice data when the vowel 'ooh' was sung at a pitch of 290 Hz. A Kurzweil K2500 Sampler/Synthesizer was used to capture and to store the data. \nThe frequencies found in the signal can be used to identify the phonetical vowel, and are of interest in voice synthesis, therapy and training. Further details are given in Oliver (1997). \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nMagnitude\n\nNormalized Magnitudes at equi-spaced time intervals\n\n\n\n", + "download": "http://www.statsci.org/data/general/ooh.txt", + "filename": "ooh", + "name": "Voice Data from Singing the Vowel 'ooh'", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "Working parents", - "download": "https://dasl.datadescription.com/download/data/3539", - "filename": "Working-parents", - "name": "Working parents", + "description": "In studies aimed at characterising an author's style, samples of n words are taken and the number of function words in each sample counted. Often binomial or Poisson distributions are assumed to hold for the proportions of function words. The table shows the combined frequencies (x) of the articles \"the\", \"a\" and \"an\" in samples from Macauley's \"Essay on Milton\", taken from the Oxford edition of Macualey's (1923) literary essays. Non-overlapping samples were drawn from opening words of two randomly chosen lines from each of 50 pages of printed text, 10 word samples being simply extensions of 5 word samples. The data show clear evidence of underdispersion.", + "download": "http://www.statsci.org/data/oz/wdcount.txt", + "filename": "wdcount", + "name": "Underdispersed Word Counts", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true } ], - "subcategory_name": "Demographics" + "subcategory_name": "Other" }, { "datasets": [ { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "A statistics professor at a large university polled his students to find out what their majors were and what position they held in the family birth order. The results are summarized in the table.", - "download": "https://dasl.datadescription.com/download/data/3076", - "filename": "Birth-order", - "name": "Birth order", + "description": "The data give the number of deaths in prison custody in Australia in each of the six years 1990 to 1995, given separately for Aboriginal and Torres Strait Islanders (indigenous) and others (non-indigenous). \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nYear\n\n1990 through 1995\n\nIndigenous\n\nYes = Aboriginal or Torres Strait Islander, No = Non-indigenous\n\nPrisoners\n\nTotal number in prison custody\n\nDeaths\n\nNumber of deaths in prison custody\n\nPopulation\n\nAdult population (15+ years)\n\n\n\n\nThe data were collected in response to the Royal Commission into Aboriginal Deaths in Custody, the final report of which was tabled in the Federal Parliament on the 9 May 1991. \nThe report of the Royal Commission has two streams. One is concerned with the ninety-nine Aboriginal and Torres Strait Islander deaths in custody which occurred throughout Australia during the period 1 January 1980 to 31 May 1989. Issues around the causes of death, culpability of custodians and their employers, and the prevention of future deaths were addressed in depth. The second stream concerned what the Royal Commission called the ‘underlying issues’: the social, cultural, and legal factors which, in the view of the Commissioners, had some bearing on the deaths. These underlying issues, as revealed from the chapter headings of the Royal Commission’s National Report, included the Legacy of History, Aboriginal Society Today, Relations With the Non-Aboriginal Community, The Harmful Use of Alcohol and Other Drugs, Schooling, Employment, Unemployment and Poverty, Housing and Infrastructure, Land Needs, and Self-determination. \nThe link between the Royal Commission’s discussion of the individual deaths investigated, the prevention of future deaths and the underlying issues, is its position on the over-representation of Indigenous people in custody in Australia. A central conclusion of the Royal Commission, illustrating this point, was as follows: \nThe work of the commission has established that Aboriginal people in custody do not die at a greater rate than non-Aboriginal people in custody. \nHowever, what is overwhelming different is the rate at which Aboriginal people come into custody, compared with the rate of the general community ... The ninety-nine who died in custody illustrate that over-representation and, in a sense, are the victims of it. \nThe conclusions are clear. Aboriginal people die in custody at a rate relevant to their proportion of the whole population which is totally unacceptable and which would not be tolerated if it occurred in the non-Aboriginal community. But this occurs not because Aboriginal people in custody are more likely to die than others in custody, but because the Aboriginal population is grossly over-represented in custody. Too many Aboriginal people are in custody too often (Johnston, 1991, Vol 1, p6).", + "download": "http://www.statsci.org/data/oz/custody.txt", + "filename": "custody", + "name": "Aboriginal Deaths in Custody", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "The technology committee at a school has stated that the average time spent by students per lab visit has increased and the increase supports their argument that they need to increase lab fees.\nTo substantiate this claim, the committee randomly sampled 12 student lab visits and noted the amount of time spent using the computer. The times in minutes are given:", - "download": "https://dasl.datadescription.com/download/data/3127", - "filename": "Computer-lab", - "name": "Computer lab fees", + "description": "Facts on the countries of Asia. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nCountry \n\nName\n\nArea \n\nTotal area (sq km)\n\nPopulation \n\nPopulation July 1995 est.\n\nLife \n\nLife Expectancy 1995 est. (years)\n\nGDP \n\nGDP 1994 (US$ billions)\n\nGDP/caput \n\nGDP per person 1994 est (US$)\n\n\n\n", + "download": "http://www.statsci.org/data/oz/asia.txt", + "filename": "asia", + "name": "Countries of Asia", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true - }, + } + ], + "subcategory_name": "Administration" + }, + { + "datasets": [ { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "Students in two basic Spanish classes were required to learn 50 new vocabulary words. One group of 45 students received the list on Monday and studied the words all week. Statistics summarizing this group’s scores on Friday’s quiz are given. The other group of 25 students did not get the vocabulary list until Thursday. They also took the quiz on Friday, after “cramming” Thursday night. Then, when they returned to class the following Monday, they were retested—without advance warning. Both sets of test scores for these students are given.", - "download": "https://dasl.datadescription.com/download/data/3140", - "filename": "Cramming", - "name": "Cramming", + "description": "The data give the survival status of passengers on the Titanic, together with their names, age, sex and passenger class. \nAbout half of the ages for the 3rd Class passengers are missing, although a good many of these could be filled in from the original source below. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nName\n\nRecorded name of passenger\n\nPClass\n\nPassenger class: 1st, 2nd or 3rd\n\nAge\n\nAge in years\n\nSex\n\nmale or female\n\nSurvived\n\n1 = Yes, 0 = No\n\n\n\n", + "download": "http://www.statsci.org/data/general/titanic.txt", + "filename": "titanic_", + "name": "Passengers on the Titanic", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "-", - "download": "https://dasl.datadescription.com/download/data/3184", - "filename": "Education-by-age", - "name": "Education by age", + "description": "For each of ten streets with bike lanes, investigators measured the distance between the centre line and a cylist in the bike lane. They used photography to determine the distance between the cyclist and a passing car on those same ten streets, recording all distances in feet. \n", + "download": "http://www.statsci.org/data/general/cyclist.txt", + "filename": "cyclist", + "name": "Distance of Cars from Cyclists", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "Is college worth the expense? Which colleges have graduates who earn the most? And what is the best predictor of earnings 5-years out? The data provide several possible predictors and background information suitable for building regression models. ", - "download": "https://dasl.datadescription.com/download/data/3249", - "filename": "Graduate-Earnings", - "name": "Graduate Earnings", + "description": "Hourly carbon monoxide (CO) averages were recorded on summer weekdays at a measurement station in Los Angeles. The station was established by the Environmental Protection Agency as part of a larger study to assess the effectiveness of the catalytic converter. It was located about 25 feet from the San Diego Freeway, which in this particular area is located at 145 degrees north. It was located such that winds from 145 to 325 degress (which in the summer are the prevalent wind directions during the daylight hours) transport the CO emissions from the highway toward the measurement station. Aggregate measurements were recored for each hour of the day 1 to 24. \nHour \n- \nhour of the day, from midnight to midnight \nCO \n- \naverage summer weekday CO concentration (parts per million) \nTD \n- \naverage weekday traffic density (traffic count/traffic speed) \nWS \n- \naverage perpendicular wind-speed component,\nwind speed x cos(wind direction - 235 degrees) \n\nIt would be interesting to have wind speed and direction recorded separately. ", + "download": "http://www.statsci.org/data/general/cofreewy.txt", + "filename": "cofreewy", + "name": "Carbon Monoxide from a Freeway", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "The National Center for Education Statistic reports average mathematics achievement scores for eighth graders in all 50 states.", - "download": "https://dasl.datadescription.com/download/data/3332", - "filename": "Math-scores-2013", - "name": "Math scores 2013", + "description": "This is a highly fractionated two-level factorial design employed as a screening design in an off-line welding experiment performed by the National Railway Corporation of Japan. There were 16 runs and 9 experimental factors. The response variable is the observed tensile strength of the weld, one of several quality characteristics measured. All other variables are at plus and minus levels. \n\n\n\n\nVariable\n\nDescription\n\n\n\n\nRods\n\nKind of welding rods\n\nDrying\n\nPeriod of drying\n\nMaterial\n\nWelded material\n\nThickness\n\nThickness\n\nAngle\n\nAngle\n\nOpening\n\nOpening\n\nCurrent\n\nCurrent\n\nMethod\n\nWelding method\n\nPreheating\n\nPreheating\n\nStrength\n\nTensile strength of the weld in kg/mm\n", + "download": "http://www.statsci.org/data/general/welding.txt", + "filename": "welding", + "name": "Tensile Strength of Welds", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "Scores on SAT tests for 162 students at the same school. (The identity of the school is not provided for privacy.) How are Math and Verbal scores related? Would a regression model be appropriate? Is there a difference in male and female scores? How would that difference be modeled? ", - "download": "https://dasl.datadescription.com/download/data/3438", - "filename": "SAT-scores", - "name": "SAT scores", + "description": "Military pilots sometimes black out when their brains are deprived of oxygen due to G-forces during violent maneuvers. Glaister and Miller (1990) produced similar symptoms by exposing volunteers’ lower bodies to negative air pressure, likewise decreasing oxygen to the brain. The data lists the subjects' ages and whether they showed syncopal blackout related signs (pallor, sweating, slow heartbeat, unconsciousness) during an 18 minute period. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nSubject\n\nInitials of the subject's name\n\nAge\n\nSubject's age in years\n\nSigns\n\nWhether subject showed blackout-related signs (0=No, 1=Yes)\n", + "download": "http://www.statsci.org/data/general/gforces.txt", + "filename": "gforces", + "name": "G-Induced Loss of Consciousness", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true - }, + } + ], + "subcategory_name": "Travel" + }, + { + "datasets": [ { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "A school district superintendent wants to test a new method of teaching arithmetic in the fourth grade at his 15 schools. He plans to select 8 students from each school to take part in the experiment, but to make sure they are roughly of the same ability, he first gives a test to all 120 students. The data hold the scores of the test by school.", - "download": "https://dasl.datadescription.com/download/data/3439", - "filename": "School-system", - "name": "School system", + "description": "The data give the number of deaths cuased by firearms in Australia from 1983 to 1997, expressed as a rate per 100,000 of population. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nYear \n\nYear\n\nRate \n\nNumber of deaths caused by firearms per 100,000 population\n", + "download": "http://www.statsci.org/data/oz/firearms.txt", + "filename": "firearms", + "name": "Deaths Caused by Firearms", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "The dataset contains data from a class survey ", - "download": "https://dasl.datadescription.com/download/data/3465", - "filename": "Student-survey", - "name": "Student survey", + "description": "Criminologists are interested in the effect of punishment regimes on crime rates. This has been studied using aggregate data on 47 states of the USA for 1960. The data set contains the following columns: \n \nVariable\n \nDescription\n\nM\n\npercentage of males aged 14–24 in total state population\n\nSo\n\nindicator variable for a southern state \n\nEd\n\nmean years of schooling of the population aged 25 years or over\n\nPo1\n\nper capita expenditure on police protection in 1960 \n\nPo2\n\nper capita expenditure on police protection in 1959 \n\nLF\n\nlabour force participation rate of civilian urban males in the age-group 14-24\n\nM.F\n\nnumber of males per 100 females \n\nPop\n\nstate population in 1960 in hundred thousands\n\nNW\n\npercentage of nonwhites in the population \n\nU1\n\nunemployment rate of urban males 14–24 \n\nU2\n\nunemployment rate of urban males 35–39 \n\nWealth\n\nwealth: median value of transferable assets or family income\n\nIneq\n\nincome inequality: percentage of families earning below half the median income\n\nProb\n\nprobability of imprisonment: ratio of number of commitments to number of offenses\n\nTime\n\naverage time in months served by offenders in state prisons before their first release\n\nCrime\n\ncrime rate: number of offenses per 100,000 population in 1960\n", + "download": "http://www.statsci.org/data/general/uscrime.txt", + "filename": "uscrime", + "name": "Effect of Punishment Regimes on Crime Rates", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "Researchers randomly assigned subjects to take one of two tests (form A or form B) either electronically or with pencil and paper. Subjects then took the other test using the other method. The two forms had been designed to be equivalent in difficulty, but nevertheless, that equivalence was checked as part of the experiment. Our concern is whether subjects did equally well with each testing method.", - "download": "https://dasl.datadescription.com/download/data/3466", - "filename": "Student-testing", - "name": "Student testing", + "description": "A number of homicide incidents in Australia have involved multiple killings. A multiple killing is defined as any incident where two or more persons are murdered. According to available literature, there have been 24 multiple killings by firearm between 1987 and 1996. These resulted in 128 deaths. The data give the number of multiple killings which have been recorded for the period 1987 to 28 April 1996. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nYear \n\n1987 - 1996.\n\nIncidents \n\nNumber of multiple killings\n\nDeaths \n\nTotal number of deaths\n\n\n\n\nThe data for the year 1996 include killings only up to and including 28 April.", + "download": "http://www.statsci.org/data/oz/multkill.txt", + "filename": "multkill", + "name": "Multiple Killings Committed with a Firearm", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true - }, + } + ], + "subcategory_name": "Crime" + }, + { + "datasets": [ { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "Summer school", - "download": "https://dasl.datadescription.com/download/data/3469", - "filename": "Summer-school", - "name": "Summer school", + "description": "An individual's critical flicker frequency is the highest frequency at which the flicker in a flickering light source can be detected. At frequencies above the critical frequency, the light source appears to be continuous even though it is actually flickering. This investigation recorded critical flicker frequency and iris colour of the eye for 19 subjects. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nColour\n\nEye colour: Brown, Green or Blue\n\nFlicker\n\nCritical flicker frequency in cycles/sec\n", + "download": "http://www.statsci.org/data/general/flicker.txt", + "filename": "flicker", + "name": "Eye Colour and Flicker Frequency", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true - }, + } + ], + "subcategory_name": "Population" + }, + { + "datasets": [ { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "Tuition 2016", - "download": "https://dasl.datadescription.com/download/data/3502", - "filename": "Tuition-2016", - "name": "Tuition 2016", + "description": "The data were collected as part of a time study for Telecom, now known as Telstra. The purpose if the study was to model the total hours worked in a section of Telecom in terms of the counts of various tasks. It was hoped that such a model could be used to predict hours worked and hence staffing requirements in changing circumstances. The number of hours worked by employees in a fault reporting centre were recorded, together with the number of faults of each type which were recorded. \nEmployees often work on a flexitime system which allows them to build up time and to leave early every second Friday. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nHours\n\nNumber of hours worked\n\nByDa\n\nNumber of talks of a certain type\n\nPR\n\n\n\nRWT\n\nA type of fault variable \n\nFault\n\n\n\nSOA\n\nNumber of service orders of type A \n\nSOB\n\nNumber of service orders of type B \n\nSOC\n\nNumber of service orders of type C \n\nCable\n\n\n\nField\n\nField call \n\nHot\n\nHotline \n\nREST\n\n\n\nSpec\n\n\n\nApp\n\n\n\nProb\n\n\n\nSC\n\n\n\nHO\n\n\n\nMO\n\n\n\nDay\n\nDay of the week: 1-Monday, 2-Tuesday, 3-Wednesday, 4-Thursday, 5-Friday \n", + "download": "http://www.statsci.org/data/oz/telecom.txt", + "filename": "telecom", + "name": "Telecom Work Measurement Study", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "https://collegescorecard.ed.gov/data/", - "download": "https://dasl.datadescription.com/download/data/3503", - "filename": "Tuition-All-Schools", - "name": "Tuition All Schools 2016", + "description": "An industrial Taguchi experiment was performed to study the influence of several controllable factors on the mean value and the variation in the percentage of shrinkage of products made by injection moulding. For studying the variation, three noise factors were also included in the design. All factors were set at two levels. \nThe problem is a `nominal-is-best' problem where the aim is to reach a certain tartet for the percentage shrinkage, at the same time having as small as variation as possible about the target value. The design that was applied is a so-called Taguchi L8(27)-design with seven controllable factors. At each setting of the controllable factors, the noise factors were varied according to a Taguchi L4(23)-design. \n\n\n\n\nVariable\n\nDescription\n\n\n\n\nControllable Factors:\n\nCycle\n\nCycle time\n\nMould\n\nMould temperature\n\nCavity\n\nCavity thickness\n\nPressure\n\nHolding pressure\n\nSpeed\n\nInjection speed\n\nTime\n\nHolding time\n\nGate\n\nGate size\n\nNoise Factors:\n\nRegrind\n\nPercentage regrind\n\nMoisture\n\nMoisture content\n\nTemperature\n\nAmbient temperature\n\nResponse:\n\nShrinkage\n\nPercentage shrinkage\n", + "download": "http://www.statsci.org/data/general/injmould.txt", + "filename": "injmould", + "name": "Injection Moulding Shrinkage", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "The data give the mean ACT composite scores for all 450 Wisconsin public schools in 2015 along with the type of school and number of students.", - "download": "https://dasl.datadescription.com/download/data/3533", - "filename": "Wisconsin-ACT-2015", - "name": "Wisconsin ACT 2015", + "description": "The data give the ambient temperature and the number of primary O-rings damaged for 23 of the 24 space shuttle launches before the launch of the space shuttle Challenger on January 20, 1986. (Challenger was the 25th shuttle. One engine was lost at sea and could not be examined.) Each space shuttle contains 6 primary O-rings. \nThe forecast temperate of the launching day of the Challenger was 31 degrees F. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nTemp\n\nAmbient temperature\n\nDamaged\n\nNumber of O-rings damaged\n", + "download": "http://www.statsci.org/data/general/challenger.txt", + "filename": "challenger", + "name": "Space Shuttle Challenger", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, - "description": "Wisconsin ACT math", - "download": "https://dasl.datadescription.com/download/data/3534", - "filename": "Wisconsin-ACT-math", - "name": "Wisconsin ACT math", + "description": "The data consist of failures of a piece of electronic equipment operating in two modes. For each operating period, Mode1 is the time spent operating in one mode and Mode2 is the time spent operating in the other. The total number of failures recorded in each period is recorded. \n \n\n\n \nVariable \n \nDescription\n \n\n\n \nMode1 \n \nTime in operating mode 1\n \nMode2 \n \nTime in operating mode 2\n \nFailures\n \nNumber of failures\n \n\n\n", + "download": "http://www.statsci.org/data/general/twomodes.txt", + "filename": "twomodes", + "name": "Failures of Electronic Equipment", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "This dataset contains variables that address the relationship between \npublic school expenditures and academic performance, as measured by the \nSAT. \n\nVARIABLE DESCRIPTIONS: \nColumns\n 1 - 16 Name of state (in quotation marks)\n18 - 22 Current expenditure per pupil in average daily attendance \n in public elementary and secondary schools, 1994-95 \n (in thousands of dollars)\n24 - 27 Average pupil/teacher ratio in public elementary and \n secondary schools, Fall 1994\n29 - 34 Estimated average annual salary of teachers in public \n elementary and secondary schools, 1994-95 (in thousands of \n dollars)\n36 - 37 Percentage of all eligible students taking the SAT, 1994-95\n39 - 41 Average verbal SAT score, 1994-95\n43 - 45 Average math SAT score, 1994-95\n47 - 50 Average total score on the SAT, 1994-95 ", - "download": "http://jse.amstat.org/datasets/sat.dat.txt", - "filename": "sat", - "name": "Getting What You Pay For: The Debate Over Equity in Public School Expenditures ", - "number_format": 31, - "remove_quotes": true, - "separator": "SPACE", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": false - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Data are from the 1995 U.S. News report on American colleges and\nuniversities. They include demographic information on tuition,\nroom & board costs, SAT or ACT scores, application/acceptance\nrates, student/faculty ratio, graduation rate, and more. The\ndataset is used for the 1995 Data Analysis Exposition, sponsored\nby the Statistical Graphics Section of the American Statistical\nAssociation. See the file colleges.txt for more information on \nthe Exposition.", - "download": "http://jse.amstat.org/datasets/usnews.dat.txt", - "filename": "usnews", - "name": "U.S. News College data", - "number_format": 31, - "remove_quotes": true, - "separator": ",", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": false } ], - "subcategory_name": "Education" + "subcategory_name": "Technology" } ] }, { "category_name": "Physics", "subcategories": [ { "datasets": [ { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "Why does the moon appear to be so much larger when it is near the horizon than when it is directly overhead? This question has produced a wide variety of theories from psychologists. An important early hypothesis was put forth by Holway and Boring (1940) who suggested that the illusion was due to the fact that when the moon was on the horizon, the observer looked straight at it with eyes level, whereas when it was at its zenith, the observer had to elevate his or her eyes as well as his or her head to see it. To test this hypothesis, Kaufman and Rock (1962) devised an apparatus that allowed them to present two artificial moons, one at the horizon and one at the zenith, and to control whether the subjects elevated their eyes or kept them level to see the zenith moon. The horizon, or comparison, moon was always viewed with eyes level. Subjects were asked to adjust the variable horizon moon to match the size of the zenith moon or vice versa. For each subject the ratio of the perceived size of the horizon moon to the perceived size of the zenith moon was recorded with eyes elevated and with eyes level. A ratio of 1.00 would represent no illusion. If Holway and Boring were correct, there should be a greater illusion in the eyes-elevated condition than in the eyes-level condition.", "download": "http://www.statsci.org/data/general/moon.txt", "filename": "moon", "name": "The Moon Illusion", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "The data give the magnitudes of a variable star at midnight on 600 consecutive nights. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nStar\n\nMagnitude on each night\n", "download": "http://www.statsci.org/data/general/star.txt", "filename": "star", "name": "Magnitudes of a Variable Star", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "The data gives a sequence of observations on the magnitude of a variable Cepheid star made from the Mount Stromlo Observatory near Canberra in Australia. The observations were made as part of the MACHO project. \nThe MACHO project monitors millions of stars every night with a dedicated telescope at Mount Stromlo Observatory. The collaboration is probing the halo of our galaxy in order to detect dark matter in the form of Massive Compact Halo Objects -- MACHOs. These are astronomical bodies that emit negligible visible light, such as dwarf or neutron stars, large planets, and black holes. Detection of a MACHO is achieved by observing its gravitational lensing effect on a chance background star as the MACHO crosses near the line of sight between the observer and this star. In order to detect a sufficiently large number of MACHOs, the project collects observations on an large number of distant stars over an extended period of time. Data are being collected daily over a 4-year period (weather permitting), on approximately 8 million stars in the Large Magellanic Cloud (LMC) and the bulge of the Milky Way. \nThis database is a valuable resource for many other types of astronomical research. It is the most comprehensive catalog of stars in the LMC and contains stars much dimmer than those covered by previous surveys. Temporal coverage is unusually long compared to most star surveys, which permits a comprehensive study of star variability, including long periods and transient phenomena. About 40,000 variable stars have been observed in the LMC and a similar number in the galactic bulge. \nVariable stars are stars for which the intensity of the emitted energy changes over time; for periodic variable stars the change of intensity is periodic over time. Common types of periodic variable stars include eclipsing binaries, RR Lyraes, and Cepheids. Cepheids are very bright stars with periods of 1-70 days. The light curve has an asymmetric shape, and rises more rapidly than it falls. Cepheids with periods of about 1 week tend to have a bump in the descending part of the curve. For periods of about 10 days, the bump is at the peak of the curve, and for longer periods it is on the rising part of the curve. The brightness changes are caused by periodic pulsation (contraction and expansion) of the stars and their outer layers. \nThere are numerous additional types of variable stars, and each of the categories above contains subcategories. For example, Beat Cepheids and Beat RR Lyrae oscillate at more than one frequency. Different classes of variable stars can be located in different regions of a plot of magnitude versus temperature or spectral type. For example, RR Lyrae and Cepheids lie on a strip called the \"instability strip.\" Different types of variable curves are classified also on the basis of the shapes of their light curves and the relationships of shapes to period, for example. As well as being important for studies of stellar structure and evolution, these classes are used to determine distances on a cosmic scale by means of the relationship of their periods to their magnitudes. \nObservations of these stars are typically made at rather irregular times, depending on observation schedules and sky conditions. Different observations have differing errors. All this makes determination of the periods and the shapes of the corresponding phased light curves an interesting statistical problem. \nThis particular star is a category 1 Cepheid (magnitude -9.166) at coordinates (1541.5,1395.1). Magnitudes were recorded in the blue band from 4500 to 6300 Angstroms. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nTime\n\nTime of observation in days\n\nMagnitude\n\nDifferential Magnitude\n\nSD\n\nStandard deviation of the magnitude observation. A value of -99 indicates missing and negative value indicate unreliable observations.\n", "download": "http://www.statsci.org/data/oz/ceph1.txt", "filename": "ceph1", "name": "Magnitudes of Variable Star Cepheid 1", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "The data gives observations on the magnitude of a Cepheid variable star made from the Mount Stromlo Observatory near Canberra in Australia. Magnitudes were recorded separately for the blue and red bands. Observation times were irregularly spaced depending on sky conditions and the observation schedule. The observations were made as part of the MACHO project. This particular star is Cepheid star number 2. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nTime\n\nTime of observation in days\n\nBlue\n\nDifferential magnitude in the blue band from 4500 to 6300 Angstroms\n\nBlueSD\n\nStandard deviation of the blue-band observation. A value of -99 indicates missing and negative values indicate unreliable observations.\n\nRed\n\nDifferential magnitude in the red band from 6300 to 7600 Anstroms\n\nRedSD\n\nStandard deviation of the red-band observation. A value of -99 indicates missing and negative values indicate unreliable observations.\n\n\n\n", "download": "http://www.statsci.org/data/oz/ceph2.txt", "filename": "ceph2", "name": "Magnitudes of Variable Star Cepheid 2", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "The data gives a sequence of observations on the magnitude of an eclipsing binary variable star made from the Mount Stromlo Observatory near Canberra in Australia. The observations were made as part of the MACHO project. \nThe MACHO project monitors millions of stars every night with a dedicated telescope at Mount Stromlo Observatory. The collaboration is probing the halo of our galaxy in order to detect dark matter in the form of Massive Compact Halo Objects -- MACHOs. These are astronomical bodies that emit negligible visible light, such as dwarf or neutron stars, large planets, and black holes. Detection of a MACHO is achieved by observing its gravitational lensing effect on a chance background star as the MACHO crosses near the line of sight between the observer and this star. In order to detect a sufficiently large number of MACHOs, the project collects observations on an large number of distant stars over an extended period of time. Data are being collected daily over a 4-year period (weather permitting), on approximately 8 million stars in the Large Magellanic Cloud (LMC) and the bulge of the Milky Way. \nThis database is a valuable resource for many other types of astronomical research. It is the most comprehensive catalog of stars in the LMC and contains stars much dimmer than those covered by previous surveys. Temporal coverage is unusually long compared to most star surveys, which permits a comprehensive study of star variability, including long periods and transient phenomena. About 40,000 variable stars have been observed in the LMC and a similar number in the galactic bulge. \nVariable stars are stars for which the intensity of the emitted energy changes over time; for periodic variable stars the change of intensity is periodic over time. Common types of periodic variable stars include eclipsing binaries, RR Lyraes, and Cepheids. Eclipsing binaries consist of two stars orbiting each other in a conformation relative to the observer such that brightness variability occurs as one star passes in front of the other in turn; as the stars may be of different brightness, the drop in light flux depends on which star is in the front. These stars have periods of between 3 hours and 24 years, although 0.5 to 10 days is the most common range. \nThere are numerous additional types of variable stars, and each of the categories above contains subcategories. For example, Beat Cepheids and Beat RR Lyrae oscillate at more than one frequency. Different classes of variable stars can be located in different regions of a plot of magnitude versus temperature or spectral type. For example, RR Lyrae and Cepheids lie on a strip called the \"instability strip.\" Different types of variable curves are classified also on the basis of the shapes of their light curves and the relationships of shapes to period, for example. As well as being important for studies of stellar structure and evolution, these classes are used to determine distances on a cosmic scale by means of the relationship of their periods to their magnitudes. \nObservations of these stars are typically made at rather irregular times, depending on observation schedules and sky conditions. Different observations have differing errors. All this makes determination of the periods and the shapes of the corresponding phased light curves an interesting statistical problem. \nThis particular star is a category 1 eclipsing binary (magnitude -10.26) at coordinates (1617.8, 669.35). Magnitudes were recorded in the blue band from 4500 to 6300 Angstroms. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nTime\n\nTime of observation in days\n\nMagnitude\n\nDifferential Magnitude\n\nSD\n\nStandard deviation of the magnitude observation. A value of -99 indicates missing and negative value indicate unreliable observations.\n", "download": "http://www.statsci.org/data/oz/ecbi1041.txt", "filename": "ecbi1041", "name": "Magnitudes of an Eclipsing Binary Variable Star", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "The data gives observations on the magnitude of a RR Lyrae variable star made from the Mount Stromlo Observatory near Canberra in Australia. Magnitudes were recorded separately for the blue and red bands. Observation times were irregularly spaced depending on sky conditions and the observation schedule. The observations were made as part of the MACHO project. This particular star is RRab Lyrae star number 1061, a category 1 star with an asymmetric signal. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nTime\n\nTime of observation in days\n\nBlue\n\nDifferential magnitude in the blue band from 4500 to 6300 Angstroms\n\nBlueSD\n\nStandard deviation of the blue-band observation. A value of -99 indicates missing and negative values indicate unreliable observations.\n\nRed\n\nDifferential magnitude in the red band from 6300 to 7600 Anstroms\n\nRedSD\n\nStandard deviation of the red-band observation. A value of -99 indicates missing and negative values indicate unreliable observations.\n", "download": "http://www.statsci.org/data/oz/rrl1061.txt", "filename": "rrl1061", "name": "Magnitudes of Variable Star RR Lyrae 1061", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "The data gives observations on the magnitude of a RR Lyrae variable star made from the Mount Stromlo Observatory near Canberra in Australia. Magnitudes were recorded separately for the blue and red bands. Observation times were irregularly spaced depending on sky conditions and the observation schedule. The observations were made as part of the MACHO project. This particular star is RRc Lyrae star number 1198, a category 1 star with a symmetric signal. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nTime\n\nTime of observation in days\n\nBlue\n\nDifferential magnitude in the blue band from 4500 to 6300 Angstroms\n\nBlueSD\n\nStandard deviation of the blue-band observation. A value of -99 indicates missing and negative values indicate unreliable observations.\n\nRed\n\nDifferential magnitude in the red band from 6300 to 7600 Anstroms\n\nRedSD\n\nStandard deviation of the red-band observation. A value of -99 indicates missing and negative values indicate unreliable observations.\n", "download": "http://www.statsci.org/data/oz/rrl1198.txt", "filename": "rrl1198", "name": "Magnitudes of Variable Star RR Lyrae 1198", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "The data gives observations on the magnitude of a RR Lyrae variable star made from the Mount Stromlo Observatory near Canberra in Australia. Magnitudes were recorded separately for the blue and red bands. Observation times were irregularly spaced depending on sky conditions and the observation schedule. The observations were made as part of the MACHO project. This particular star is RRc Lyrae star number 1263, a category 1 star with a symmetric signal. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nTime\n\nTime of observation in days\n\nBlue\n\nDifferential magnitude in the blue band from 4500 to 6300 Angstroms\n\nBlueSD\n\nStandard deviation of the blue-band observation. A value of -99 indicates missing and negative values indicate unreliable observations.\n\nRed\n\nDifferential magnitude in the red band from 6300 to 7600 Anstroms\n\nRedSD\n\nStandard deviation of the red-band observation. A value of -99 indicates missing and negative values indicate unreliable observations.\n\n\n\n", "download": "http://www.statsci.org/data/oz/rrl1263.txt", "filename": "rrl1263", "name": "Magnitudes of Variable Star RR Lyrae 1263", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "On August 24, 2006, the International Astronomical Union voted that Pluto is not a planet. Some members of the public have been reluctant to accept that decision. The data show the average distance of each of the traditional nine planets from the sun. Is there a pattern? Does Pluto fit with the other “official” planets? ", - "download": "https://dasl.datadescription.com/download/data/3397", - "filename": "Planets", - "name": "Planets", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "On August 24, 2006, the International Astronomical Union voted that Pluto is not a planet. Some members of the public have been reluctant to accept that decision. The data show a variety of facts about the 8 planets and Pluto. Exercises consider two models for the planets. Does Pluto behave like a planet? ", - "download": "https://dasl.datadescription.com/download/data/3398", - "filename": "Planets-more", - "name": "Planets more data", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true } ], "subcategory_name": "Astronomy" }, { "datasets": [ { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "These data are the result of a study involving the analysis of performance degradation data from accelerated tests. The response variable is dialectric breakdown strength in kilo-volts, and the predictor variables are time in weeks and temperature in degrees Celcius. The study can be viewed as an 8 by 4 factorial experiment. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nStrength\n\nDialectric breakdown strength in kilovolts\n\nTime\n\nDuration of testing in weeks (8 levels)\n\nTemperature\n\nTemperature in degrees Celsius (4 levels)\n", "download": "http://www.statsci.org/data/general/dialectr.txt", "filename": "dialectr", "name": "Dialectric Breakdown Strength", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true } ], "subcategory_name": "Electronics" }, { "datasets": [ { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "The data give time series measurements on waves emanating from a cylinder suspended in a tank of water. The waves are believed to show a high frequency vibration, which is an artifact of the experiment equipment, as well as lower frequency vibration which reflects forces acting on the cylinder. It is of interest to identify and to filter out the high frequency vibration. \n\n\n\n\nVariable\n\nDescription\n\n\n\n\nWaves\n\nRelative vertical displacement at equi-spaced times\n", "download": "http://www.statsci.org/data/general/waves.txt", "filename": "waves_", "name": "Forces on a Cylinder Suspended in Water", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "Scientist Robert Boyle examined the relationship between the volume in which a gas is contained and the pressure in its container. He used a cylindrical container with a moveable top that could be raised or lowered to change the volume. He measured the Height in inches by counting equally spaced marks on the cylinder, and", - "download": "https://dasl.datadescription.com/download/data/3083", - "filename": "Boyle", - "name": "Boyle", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "We know from common sense and from Physics that heavier cars need more fuel, but exactly how does a car’s weight affect its fuel efficiency? The data set continues data on 38 cars including their fuel efficiency in miles per gallon measured on a track. ", - "download": "https://dasl.datadescription.com/download/data/3228", - "filename": "Fuel-efficiency", - "name": "Fuel efficiency", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "A student experimenting with a pendulum counted the number of full swings the pendulum made in 20 seconds for various lengths of string. Her data are given. ", - "download": "https://dasl.datadescription.com/download/data/3390", - "filename": "Pendulum", - "name": "Pendulum", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true } ], "subcategory_name": "Other" } ] }, { "category_name": "Chemistry", "subcategories": [ { "datasets": [ { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "The data give the concentrations at equi-spaced times of an intermediate compound during a chemical experiment involving a catalyst. The experiment was conducted in the Department of Chemistry at the Australian National University. The compound is producing exponentially during the first stage of the experiment and then is consumed exponentially during the second stage. Theoretically the process can be described by a compartment model, and the expected concentration of the compound over time is decribed by a second order differential equation. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nConcentration\n\nConcentration of intermediate compound\n", "download": "http://www.statsci.org/data/oz/sargeson.txt", "filename": "sargeson", "name": "Chemical Experiment with Catalyst", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true }, { "DateTime_format": "yyyy-MM-dd", "comment_character": "#", "create_index_column": false, "description": "This dataset is distributed with S-Plus as the dataframe ethanol. \nThe engine exhaust was analysed in an experiment in which ethanol was burned in a single cylinder automobile test engine. The response variable is NOx, the concentration of nitric oxide (NO) and nitrogen dioxide (NO2) in the engine exhaust, normalized by the work done by the engine. The explanatory variables are the compression ratio of the engine and the equivalence ratio at which the engine was run - a measure of the richness of the air/ethanol mix. \n\n\n\n\nVariable \n\nDescription\n\n\n\n\nNOx\n\nConcentration of nitric oxide (NO) and nitrogen dioxide (NO2)\n\nCompression\n\nCompression ratio\n\nEquivalence\n\nEquivalence ratio\n", "download": "http://www.statsci.org/data/general/ethanol.txt", "filename": "ethanol", "name": "Exhaust from Burning Ethanol", "number_format": 31, "remove_quotes": true, "separator": "TAB", "simplify_whitespaces": false, "skip_empty_parts": true, "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "-", - "download": "https://dasl.datadescription.com/download/data/3112", - "filename": "Chromatography", - "name": "Chromatography", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true - }, - { - "DateTime_format": "yyyy-MM-dd", - "comment_character": "#", - "create_index_column": false, - "description": "A student, preparing for a triathlon, suspected that the 45 minutes each day\nshe spent training in a chlorinated pool was damaging her nail polish. She\nwished to investigate whether the color of the nail polish might make a difference.\nShe mounted acrylic nails on sticks and polished them with two different color nail polishes. She soaked them together in a chlorine solution equivalent to a swimming pool’s chlorination and then tapped them 100 times on a computer keyboard to simulate daily stress. The response is the % of nail chipped off as measured by scanning images of the nails and using an image processing program.", - "download": "https://dasl.datadescription.com/download/data/3356", - "filename": "Nail-polish", - "name": "Nail polish", - "number_format": 31, - "remove_quotes": true, - "separator": "TAB", - "simplify_whitespaces": false, - "skip_empty_parts": true, - "use_first_row_for_vectorname": true } ], "subcategory_name": "General" } ] } ], - "collection_name": "StatSci_Datasets" + "collection_name": "OzDASL" } diff --git a/data/datasets/StatLib.json b/data/datasets/StatLib.json new file mode 100644 index 000000000..b4519fa84 --- /dev/null +++ b/data/datasets/StatLib.json @@ -0,0 +1,710 @@ +{ + "categories": [ + { + "category_name": "Medicine", + "subcategories": [ + { + "datasets": [ + { + "DateTime_format": "yyyy-MM-dd", + "comment_character": "#", + "create_index_column": false, + "description": "The file ch14.dat contains the following 19 variables:\n\nPatient ID \nDate on study (MMDDYY)\nTreatment arm (D= daunorubicin, I= idarubicin)\nSex (M= male, F= female)\nAge (years)\nFAB classification (1 - 6)\nKarnofsky score (0 - 100) \nBaseline white blood cells (in thousands per cubic millimeter)\nBaseline platelets (in thousands per cubic millimeter)\nBaseline hemoglobin (g/dl)\nEvaluable (Y= yes, N= no)\nComplete remission (CR) (Y= yes, N= no)\nCourses of chemotherapy to CR\nDate of CR (MMDDYY)\nDate of last follow-up (MMDDYY)\nStatus at last follow-up (D= dead, A= alive)\nBone marrow transplant (Y= yes, N= no)\nDate of bone marrow transplant (MMDDYY)\nInclusion in June 30, 1988 analysis (Y= yes, N= no)", + "download": "http://lib.stat.cmu.edu/datasets/csb/ch14.dat", + "filename": "Leukemia-Trial", + "name": "Interpretation of a Leukemia Trial Stopped Early", + "number_format": 31, + "remove_quotes": true, + "separator": "SPACE", + "simplify_whitespaces": false, + "skip_empty_parts": true, + "use_first_row_for_vectorname": false + }, + { + "DateTime_format": "yyyy-MM-dd", + "comment_character": "#", + "create_index_column": false, + "description": "The file ch15.dat contains the following variables:\n\n Patient ID : Integer\n \n Institution : 0 - Memorial Sloan-Kettering,\n 1 - Mayo Clinic,\n 2 - John Hopkins.\n Group : 1 - Study,\n 0 - Control.\n\n Means of Detection : 0 - Routine Cytology,\n 1 - Routine X-ray,\n 2 - Both X-ray and Cytology,\n 3 - Interval.\n\n Cell Type : 0 - Epidermoid,\n 1 - Adenocarcinoma,\n 2 - Large Cell,\n 3 - Oat Cell,\n 4 - Other.\n Stage : 4 digits, 1st digit (1,2,3) - overall stage,\n 2nd digit (1,2,3) - tumor,\n 3rd digit (0,1,2) - lymph nodes\n 4th digit (0,1) - distant metastases\n Operated : 1 - yes,\n 0 - no.\n Survival : Integer - Days from detection to last date known alive.\n Survival Category : 0 - Alive,\n 1 - Dead of lung cancer,\n 2 - Dead of other causes.\n\n Missing values - '-'.", + "download": "http://lib.stat.cmu.edu/datasets/csb/ch15.dat", + "filename": "Lung-Cancer", + "name": "Early Lung Cancer Detection Studies", + "number_format": 31, + "remove_quotes": true, + "separator": "SPACE", + "simplify_whitespaces": false, + "skip_empty_parts": true, + "use_first_row_for_vectorname": false + }, + { + "DateTime_format": "yyyy-MM-dd", + "comment_character": "#", + "create_index_column": false, + "description": "The file ch16a.dat contains extent of scleral extension\n(extent to which the tumor has invaded the sclera or \"white of the eye\")\nas coded by two raters for each of 885 eyes. There is one record for each\neye; the first field of each record contains a patient identifier, the\nsecond field contains the code for scleral extension assigned by rater A,\nand the third field contains the code for scleral extension assigned by\nrater B. The coding scheme is:\n\n1=None or innermost layers\n2=Within sclera, but does not extend to scleral surface\n3=Extends to scleral surface\n4=Extrascleral extension without transection\n5=Extrascleral extension with presumed residual tumor in the orbit\n\nThe collaborative Ocular Melanoma Study (COMS) owns the\ncopyright to this dataset; these data are considered preliminary due\nto the ongoing nature of the COMS clinical trials.", + "download": "http://lib.stat.cmu.edu/datasets/csb/ch16a.dat", + "filename": "Choroidal-Melanoma", + "name": "Modeling Interrater Agreement for Pathological Features of Choroidal Melanoma", + "number_format": 31, + "remove_quotes": true, + "separator": "SPACE", + "simplify_whitespaces": false, + "skip_empty_parts": true, + "use_first_row_for_vectorname": false + }, + { + "DateTime_format": "yyyy-MM-dd", + "comment_character": "#", + "create_index_column": false, + "description": "The file ch16b.dat contains the degree of necrosis (tissue\ndeath) data for 3 raters. The first field contains a patient identifier,\nand the second, third, and fourth fields contain the code for degree of\nnecrosis as assigned by raters A, B, and C, respectively. The coding\nscheme is:\n\n1=None\n2=Less than 10% of cells\n3=Greater than or equal to 10% of cells\n\n\nThe collaborative Ocular Melanoma Study (COMS) owns the\ncopyright to this dataset; these data are considered preliminary due\nto the ongoing nature of the COMS clinical trials.", + "download": "http://lib.stat.cmu.edu/datasets/csb/ch16b.dat", + "filename": "Choroidal-Melanoma-2", + "name": "Modeling Interrater Agreement for Pathological Features of Choroidal Melanoma", + "number_format": 31, + "remove_quotes": true, + "separator": "SPACE", + "simplify_whitespaces": false, + "skip_empty_parts": true, + "use_first_row_for_vectorname": false + }, + { + "DateTime_format": "yyyy-MM-dd", + "comment_character": "#", + "create_index_column": false, + "description": "These data remains the copyright of the Harris Birthright Research Unit\nof the University of Aberdeen, UK. It may be used freely for\nnon-commercial purposes and can be freely distributed provided its\nsource is acknowledged.\n\nThe file ch18a.dat contains the following individual-specific variables:\n\nVariable Coding\nControl/patient code 0=control, 1=patient\nStudy number 1-500 for each group\nNumber of smears 1-15\nBiopsy result 0=negative, 1=positive \n 9=missing (no biopsy)\t\nNumber of days from 0-840 if biopsy done, \nlast smear to biopsy -1 if no biopsy", + "download": "http://lib.stat.cmu.edu/datasets/csb/ch18a.dat", + "filename": "Cervical-Cancer", + "name": "Modeling the Precursors of Cervical Cancer", + "number_format": 31, + "remove_quotes": true, + "separator": "SPACE", + "simplify_whitespaces": false, + "skip_empty_parts": true, + "use_first_row_for_vectorname": false + }, + { + "DateTime_format": "yyyy-MM-dd", + "comment_character": "#", + "create_index_column": false, + "description": "These data remains the copyright of the Harris Birthright Research Unit\nof the University of Aberdeen, UK. It may be used freely for\nnon-commercial purposes and can be freely distributed provided its\nsource is acknowledged.\n\nThe file ch18a.dat contains the following smear-specific variables:\n\nVariable Coding \nControl/patient code 0=control, 1=patient \nStudy number 1-500 for each group \nSmear number 1-15 \nSmear grade 0=negative, 1=positive \nInterval in days 0-3733, 0 if 1st smear \nsince last smear", + "download": "http://lib.stat.cmu.edu/datasets/csb/ch18b.dat", + "filename": "Cervical-Cancer\n", + "name": "Modeling the Precursors of Cervical Cancer\n", + "number_format": 31, + "remove_quotes": true, + "separator": "SPACE", + "simplify_whitespaces": false, + "skip_empty_parts": true, + "use_first_row_for_vectorname": false + } + ], + "subcategory_name": "Oncology" + }, + { + "datasets": [ + { + "DateTime_format": "yyyy-MM-dd", + "comment_character": "#", + "create_index_column": false, + "description": "The file ch1b.dat is the waste site file, and contains the \nfollowing variables. There are NO missing values.\n\nx: Real, x-coordinate of location of an inactive hazardous waste\nsite containing trichloroethylene (TCE).\n\ny: Real, y-coordinate of location of an inactive hazardous waste\nsite containing trichloroethylene (TCE).\n\nsite: Integer, numerical label of waste site.\n Key: Site 1: Monarch Chemicals\n Site 2: IBM Endicott\n Site 3: Singer\n Site 4: Nesco\n Site 5: GE Auburn\n Site 6: Solvent Savers\n Site 7: Smith Corona\n Site 8: Victory Plaza\n Site 9: Hadco\n Site 10: Morse Chain\n Site 11: Groton", + "download": "http://lib.stat.cmu.edu/datasets/csb/ch1b.dat", + "filename": "Disease-Clusters", + "name": "Spatial Pattern Analysis to Detect Rare Disease Clusters", + "number_format": 31, + "remove_quotes": true, + "separator": "SPACE", + "simplify_whitespaces": false, + "skip_empty_parts": true, + "use_first_row_for_vectorname": false + }, + { + "DateTime_format": "yyyy-MM-dd", + "comment_character": "#", + "create_index_column": false, + "description": "The file ch17.dat contains the following 15 variables:\n\nVariable Description\n\nOBS Observation number\nCN Center obtaining and reading the scan\nID Scan ID\nBA1 Bone area (sq cm) from centralized Reader 1\nBA2 Bone area (sq cm) from centralized Reader 2\nBA3 Bone area (sq cm) from centralized Reader 3\nBC1 Bone mineral content (gm) from centralized Reader 1\nBC2 Bone mineral content (gm) from centralized Reader 2\nBC3 Bone mineral content (gm) from centralized Reader 3\nBMD1 Bone mineral density (gm/sq cm) from centralized Reader 1\nBMD2 Bone mineral density (gm/sq cm) from centralized Reader 2\nBMD3 Bone mineral density (gm/sq cm) from centralized Reader 3\nBA Bone area (sq cm) from participating center\nBC Bone mineral content (gm) from participating center\nBMD Bone mineral density (gm/sq cm) from participating center\n", + "download": "http://lib.stat.cmu.edu/datasets/csb/ch17.dat", + "filename": "Bone-Mineral", + "name": "Quality Control for Bone Mineral Density Scans", + "number_format": 31, + "remove_quotes": true, + "separator": "SPACE", + "simplify_whitespaces": false, + "skip_empty_parts": true, + "use_first_row_for_vectorname": false + }, + { + "DateTime_format": "yyyy-MM-dd", + "comment_character": "#", + "create_index_column": false, + "description": "The file ch21a.dat contains the spontaneous activity and rectal\ntemperature data (416 observations of 6 variables) There are no missing values.\n\nVariable List:\n\nOBS:\t\tObservation identification number.\n\nMORPHINE:\tDose of morphine sulfate (mg/kg) injected into study mice. The \n\t\trange is 0 to 8.0.\n\nDEL9_THC:\tDose of Delta9-THC (mg/kg) injected into study mice. The \n\t\trange is from 0 to 15.0.\n\nREP:\t\tIdentification of study replication. The entire 5x7 factorial \n\t\tdesign was replicated.\n\nSPON_ACT:\tSpontaneous Activity as defined by the number of interruptions \n\t\tof a photocell beam in a clear plastic cage over a 10 minute \n\t\tperiod of time.\n\nTEMP_B:\t\tRectal Temperature at baseline (just prior to treatment).\n\nTEMP_60:\tRectal Temperature at 60 minutes post treatment injection.\n\n\n", + "download": "http://lib.stat.cmu.edu/datasets/csb/ch21a.dat", + "filename": "Drug-Interactions", + "name": "Drug Interactions Between Morphine and Marijuana\n", + "number_format": 31, + "remove_quotes": true, + "separator": "SPACE", + "simplify_whitespaces": false, + "skip_empty_parts": true, + "use_first_row_for_vectorname": false + }, + { + "DateTime_format": "yyyy-MM-dd", + "comment_character": "#", + "create_index_column": false, + "description": "The file ch21b.dat contains the tail-flick data (510 observations of \n6 variables) Missing data are encoded with a period. \n\nVariable List:\n\nOBS:\t\tObservation identification number.\n\nREP:\t\tIdentification of study. Two 5x7 factorial experiments and one \n\t\t5x5 factorial experiment are included.\n\nMORPHINE:\tDose of morphine sulfate (mg/kg) injected into study mice. The \n\t\trange is 0 to 8.0.\n\nDEL9_THC:\tDose of Delta9-THC (mg/kg) injected into study mice. The \n\t\trange is from 0 to 15.0.\n\nFLICK_C:\tControl Flick Time. The number of seconds required for the \n\t\tmouse to flick it tail from beneath a heat stimulus prior to \n\t\ttreatment.\n\nFLICK_T::\tTest Flick Time. The number of seconds required for the \n\t\tmouse to flick it tail from beneath a heat stimulus post \n\t\ttreatment. A 10 sec maximum latency was imposed.", + "download": "http://lib.stat.cmu.edu/datasets/csb/ch21b.dat", + "filename": "Drug-Interactions-2", + "name": "Drug Interactions Between Morphine and Marijuana\n", + "number_format": 31, + "remove_quotes": true, + "separator": "SPACE", + "simplify_whitespaces": false, + "skip_empty_parts": true, + "use_first_row_for_vectorname": false + } + ], + "subcategory_name": "Other" + }, + { + "datasets": [ + { + "DateTime_format": "yyyy-MM-dd", + "comment_character": "#", + "create_index_column": false, + "description": "The file ch20.dat contains the following variables:\n\nid subject identifier\nclinical indicator for selection into clinical sample:\n 1=in clinical sample; 0=not in clinical sample\nstratum stratum membership:\n 1=high screen; 2=low screen blacks;\n 3=low screen whites\nrace subject's self-reported race:\n 1=white; 2=black\ngender subject's gender:\n 1=male; 2=female\nrparents subject's guardian status:\n 1=does not live with both natural parents;\n 0=lives with both natural parents\ncesdtot subject's total center for epidemiologic studies depression\n scale score (range 0-60)\ncohtot subject's total cohesion score, based on faces-ii\n (range 16-80)\nmdd clinical diagnosis of major depression:\n 1=positive diagnosis; 0=negative diagnosis\n 9=missing for subjects not in clinical sample\nweight sampling weights used in logistic regression; defined as\n number of subjects in screening sample in each stratum", + "download": "http://lib.stat.cmu.edu/datasets/csb/ch20.dat", + "filename": "Adolescent-Depression", + "name": "Two-Stage Sampling Designs for Adolescent Depression Studies", + "number_format": 31, + "remove_quotes": true, + "separator": "SPACE", + "simplify_whitespaces": false, + "skip_empty_parts": true, + "use_first_row_for_vectorname": false + } + ], + "subcategory_name": "Psychology" + }, + { + "datasets": [ + { + "DateTime_format": "yyyy-MM-dd", + "comment_character": "#", + "create_index_column": false, + "description": "The ten variables listed in each file are: \n\nAge: Age on January 1, 1982. \nGender: 0=Male, 1=Female \nEducation: 0=no college, 1=some college \nSmoker: 1=never, 2=former, 3=current \nCigarettes/day: values rounded UP to the nearest 5.\nYears smoked: Number of years smoked as of January 1, 1982. \nYears quit: Number of years since smoking cessation, as of \n January 1, 1982 (zero indicates less than one year)\nFollowup Time: Years from January 1, 1982 until death or last\n interview.\nDeath codes: 0=alive, 1=death from other causes, 2=lung cancer death.\nFreq: the frequency at which each combination of variables occured.", + "download": "http://lib.stat.cmu.edu/datasets/csb/ch19a.dat", + "filename": "never-smokers", + "name": "never-smokers", + "number_format": 31, + "remove_quotes": true, + "separator": "SPACE", + "simplify_whitespaces": false, + "skip_empty_parts": true, + "use_first_row_for_vectorname": false + }, + { + "DateTime_format": "yyyy-MM-dd", + "comment_character": "#", + "create_index_column": false, + "description": "The ten variables listed in each file are: \n\nAge: Age on January 1, 1982. \nGender: 0=Male, 1=Female \nEducation: 0=no college, 1=some college \nSmoker: 1=never, 2=former, 3=current \nCigarettes/day: values rounded UP to the nearest 5.\nYears smoked: Number of years smoked as of January 1, 1982. \nYears quit: Number of years since smoking cessation, as of \n January 1, 1982 (zero indicates less than one year)\nFollowup Time: Years from January 1, 1982 until death or last\n interview.\nDeath codes: 0=alive, 1=death from other causes, 2=lung cancer death.\nFreq: the frequency at which each combination of variables occured.", + "download": "http://lib.stat.cmu.edu/datasets/csb/ch19b.dat", + "filename": "current-smokers-m", + "name": "current smokers: male", + "number_format": 31, + "remove_quotes": true, + "separator": "SPACE", + "simplify_whitespaces": false, + "skip_empty_parts": true, + "use_first_row_for_vectorname": false + }, + { + "DateTime_format": "yyyy-MM-dd", + "comment_character": "#", + "create_index_column": false, + "description": "The ten variables listed in each file are: \n\nAge: Age on January 1, 1982. \nGender: 0=Male, 1=Female \nEducation: 0=no college, 1=some college \nSmoker: 1=never, 2=former, 3=current \nCigarettes/day: values rounded UP to the nearest 5.\nYears smoked: Number of years smoked as of January 1, 1982. \nYears quit: Number of years since smoking cessation, as of \n January 1, 1982 (zero indicates less than one year)\nFollowup Time: Years from January 1, 1982 until death or last\n interview.\nDeath codes: 0=alive, 1=death from other causes, 2=lung cancer death.\nFreq: the frequency at which each combination of variables occured.", + "download": "http://lib.stat.cmu.edu/datasets/csb/ch19c.dat", + "filename": "current-smokers-f", + "name": "current smokers: female", + "number_format": 31, + "remove_quotes": true, + "separator": "SPACE", + "simplify_whitespaces": false, + "skip_empty_parts": true, + "use_first_row_for_vectorname": false + }, + { + "DateTime_format": "yyyy-MM-dd", + "comment_character": "#", + "create_index_column": false, + "description": "The ten variables listed in each file are: \n\nAge: Age on January 1, 1982. \nGender: 0=Male, 1=Female \nEducation: 0=no college, 1=some college \nSmoker: 1=never, 2=former, 3=current \nCigarettes/day: values rounded UP to the nearest 5.\nYears smoked: Number of years smoked as of January 1, 1982. \nYears quit: Number of years since smoking cessation, as of \n January 1, 1982 (zero indicates less than one year)\nFollowup Time: Years from January 1, 1982 until death or last\n interview.\nDeath codes: 0=alive, 1=death from other causes, 2=lung cancer death.\nFreq: the frequency at which each combination of variables occured.", + "download": "http://lib.stat.cmu.edu/datasets/csb/ch19d.dat", + "filename": "former-smokers-mnc", + "name": "former smokers: male, no college", + "number_format": 31, + "remove_quotes": true, + "separator": "SPACE", + "simplify_whitespaces": false, + "skip_empty_parts": true, + "use_first_row_for_vectorname": false + }, + { + "DateTime_format": "yyyy-MM-dd", + "comment_character": "#", + "create_index_column": false, + "description": "The ten variables listed in each file are: \n\nAge: Age on January 1, 1982. \nGender: 0=Male, 1=Female \nEducation: 0=no college, 1=some college \nSmoker: 1=never, 2=former, 3=current \nCigarettes/day: values rounded UP to the nearest 5.\nYears smoked: Number of years smoked as of January 1, 1982. \nYears quit: Number of years since smoking cessation, as of \n January 1, 1982 (zero indicates less than one year)\nFollowup Time: Years from January 1, 1982 until death or last\n interview.\nDeath codes: 0=alive, 1=death from other causes, 2=lung cancer death.\nFreq: the frequency at which each combination of variables occured.", + "download": "http://lib.stat.cmu.edu/datasets/csb/ch19e.dat", + "filename": "former-smokers-msc", + "name": "former smokers: male, some college ", + "number_format": 31, + "remove_quotes": true, + "separator": "SPACE", + "simplify_whitespaces": false, + "skip_empty_parts": true, + "use_first_row_for_vectorname": false + }, + { + "DateTime_format": "yyyy-MM-dd", + "comment_character": "#", + "create_index_column": false, + "description": "The ten variables listed in each file are: \n\nAge: Age on January 1, 1982. \nGender: 0=Male, 1=Female \nEducation: 0=no college, 1=some college \nSmoker: 1=never, 2=former, 3=current \nCigarettes/day: values rounded UP to the nearest 5.\nYears smoked: Number of years smoked as of January 1, 1982. \nYears quit: Number of years since smoking cessation, as of \n January 1, 1982 (zero indicates less than one year)\nFollowup Time: Years from January 1, 1982 until death or last\n interview.\nDeath codes: 0=alive, 1=death from other causes, 2=lung cancer death.\nFreq: the frequency at which each combination of variables occured.", + "download": "http://lib.stat.cmu.edu/datasets/csb/ch19f.dat", + "filename": "former-smokers-fnc", + "name": "former smokers: female, no college", + "number_format": 31, + "remove_quotes": true, + "separator": "SPACE", + "simplify_whitespaces": false, + "skip_empty_parts": true, + "use_first_row_for_vectorname": false + }, + { + "DateTime_format": "yyyy-MM-dd", + "comment_character": "#", + "create_index_column": false, + "description": "The ten variables listed in each file are: \n\nAge: Age on January 1, 1982. \nGender: 0=Male, 1=Female \nEducation: 0=no college, 1=some college \nSmoker: 1=never, 2=former, 3=current \nCigarettes/day: values rounded UP to the nearest 5.\nYears smoked: Number of years smoked as of January 1, 1982. \nYears quit: Number of years since smoking cessation, as of \n January 1, 1982 (zero indicates less than one year)\nFollowup Time: Years from January 1, 1982 until death or last\n interview.\nDeath codes: 0=alive, 1=death from other causes, 2=lung cancer death.\nFreq: the frequency at which each combination of variables occured.", + "download": "http://lib.stat.cmu.edu/datasets/csb/ch19g.dat", + "filename": "former-smokers-fsc", + "name": "former smokers: female, some college", + "number_format": 31, + "remove_quotes": true, + "separator": "SPACE", + "simplify_whitespaces": false, + "skip_empty_parts": true, + "use_first_row_for_vectorname": false + } + ], + "subcategory_name": "Smoking" + } + ] + }, + { + "category_name": "Nature", + "subcategories": [ + { + "datasets": [ + { + "DateTime_format": "yyyy-MM-dd", + "comment_character": "#", + "create_index_column": false, + "description": "The file ch3a.dat includes the validation data collected at the stationary\nambient monitoring site. The variables are:\n\n 1. Date, in MM/DD/YY format,\n DC\n 2. 12-hour average daytime continuous ozone concentration, X ,\n 1\n DP\n 3. 12-hour average daytime passive ozone concentration, X ,\n 1\n NC\n 4. 12-hour average nighttime continuous ozone concentration, X , and\n 1\n NP\n 5. 12-hour average nighttime passive ozone concentration, X .\n 1", + "download": "http://lib.stat.cmu.edu/datasets/csb/ch3a.dat", + "filename": "Ozone_", + "name": "Prediction Models for Personal Ozone Exposure Assessment", + "number_format": 31, + "remove_quotes": true, + "separator": "SPACE", + "simplify_whitespaces": false, + "skip_empty_parts": true, + "use_first_row_for_vectorname": false + }, + { + "DateTime_format": "yyyy-MM-dd", + "comment_character": "#", + "create_index_column": false, + "description": "The file ch3b.dat includes the personal ozone exposure data. The \nvariables are:\n\n 1. Subject identification number, ranging from 1 to 23,\n\n 2. Date, in MM/DD/YY format,\n\n 3. Home region, ranging from 1 to 6,\n\n 4. 12-hour average daytime personal ozone concentration, Y,\n\n 5. 12-hour average daytime continuous ozone concentration at the\n DC\n stationary site, X ,\n 1\n\n 6. 12-hour average nighttime continuous ozone concentration at the\n NC\n stationary site, X ,\n 1\n O\n 7. 24-hour average home outdoor passive ozone concentration, X ,\n 1\n DI\n 8. 12-hour average home indoor daytime passive ozone concentration, X ,\n 1\n NI\n 9. 12-hour average home indoor nighttime passive ozone concentration, X ,\n 1\n\n 10. Prediction values for a 12-hour microenvironmental model based\n H\n on hourly ozone concentrations, X ,\n 2\n O\n 11. Fraction of time spent anywhere outdoors, X ,\n 3\n I\n 12. Fraction of time spent at home indoors, X , and\n 3\n\n 13. Indicator variable for whether the child stayed near the\n S\n home for the whole day, X , where 1 = yes, 0 = no.\n 3", + "download": "http://lib.stat.cmu.edu/datasets/csb/ch3b.dat", + "filename": "Ozone2", + "name": "Prediction Models for Personal Ozone Exposure Assessment", + "number_format": 31, + "remove_quotes": true, + "separator": "SPACE", + "simplify_whitespaces": false, + "skip_empty_parts": true, + "use_first_row_for_vectorname": false + } + ], + "subcategory_name": "Weather" + }, + { + "datasets": [ + { + "DateTime_format": "yyyy-MM-dd", + "comment_character": "#", + "create_index_column": false, + "description": "The file ch2.dat contains the following variables:\n\n animal - a unique identifier associated with each C. dubia tested\n conc - concentration (micro grams/L)\n brood1 - number of young produced in the first brood\n brood2 - number of young produced in the second brood\n brood3 - number of young produced in the third brood\n total - sum of young produced in the 3 broods (=brood1 + brood2 + brood3)", + "download": "http://lib.stat.cmu.edu/datasets/csb/ch2.dat", + "filename": "Pollutants", + "name": "Assessing Toxicity of Pollutants in Aquatic Systems ", + "number_format": 31, + "remove_quotes": true, + "separator": "TAB", + "simplify_whitespaces": false, + "skip_empty_parts": true, + "use_first_row_for_vectorname": false + }, + { + "DateTime_format": "yyyy-MM-dd", + "comment_character": "#", + "create_index_column": false, + "description": "For a selection of months in the period 1970 to 1983, a measurement of the\nocean salinity at a depth of 100 meters off the Alaskan coast, given in parts\nper thousand. Columns are:\n\n 1. year\n 2. month\n 3. salinity", + "download": "http://lib.stat.cmu.edu/crab/salinity", + "filename": "salinity-2", + "name": "ocean salinity", + "number_format": 31, + "remove_quotes": true, + "separator": "SPACE", + "simplify_whitespaces": false, + "skip_empty_parts": true, + "use_first_row_for_vectorname": false + }, + { + "DateTime_format": "yyyy-MM-dd", + "comment_character": "#", + "create_index_column": false, + "description": "For a selection of months in the period 1970 to 1983, a measurement of the\nocean temperature at a depth of 100 meters off the Alaskan coast, given in\ndegrees Celsius. Columns are:\n\n 1. year\n 2. month\n 3. temperature", + "download": "http://lib.stat.cmu.edu/crab/celsius", + "filename": "celsius", + "name": "ocean temperature", + "number_format": 31, + "remove_quotes": true, + "separator": "SPACE", + "simplify_whitespaces": false, + "skip_empty_parts": true, + "use_first_row_for_vectorname": false + } + ], + "subcategory_name": "Waters" + }, + { + "datasets": [ + { + "DateTime_format": "yyyy-MM-dd", + "comment_character": "#", + "create_index_column": false, + "description": "The file ch4a.dat contains the burlap data, with the following variables:\n\n1. mburlap = mean burlap count value obtained over 12 subplot values.\n\n2. megg = mean egg mass density per acre obtained over 21 subplot values.\n\n3. seegg = estimated standard error of mean egg mass density obtained\nover 21 subplot values.", + "download": "http://lib.stat.cmu.edu/datasets/csb/ch4a.dat", + "filename": "Gypsy-Moth", + "name": "Measurement Error Models for Gypsy Moth Studies", + "number_format": 31, + "remove_quotes": true, + "separator": "SPACE", + "simplify_whitespaces": false, + "skip_empty_parts": true, + "use_first_row_for_vectorname": false + }, + { + "DateTime_format": "yyyy-MM-dd", + "comment_character": "#", + "create_index_column": false, + "description": "The file ch4b.dat contains the defoliation data, with the following variables:\n\n1. mdef = mean defoliation value obtained from 20 subplot values.\n\n2. sedef = estimated standard error of mean defoliation\nobtained from 20 subplot values.\n\n3. megg = mean estimated egg mass density obtained over 20 subplots\n\n4. seegg = estimated standard error or mean egg \nmass density obtained from 20 subplot values.\n\n5. cdefegg = estimated covariance of mean defoliation and mean egg mass\ndensity obtained from 20 subplot values.\n", + "download": "http://lib.stat.cmu.edu/datasets/csb/ch4b.dat", + "filename": "Gypsy-Moth2", + "name": "Measurement Error Models for Gypsy Moth Studies", + "number_format": 31, + "remove_quotes": true, + "separator": "SPACE", + "simplify_whitespaces": false, + "skip_empty_parts": true, + "use_first_row_for_vectorname": false + }, + { + "DateTime_format": "yyyy-MM-dd", + "comment_character": "#", + "create_index_column": false, + "description": "The file ch7.dat contains the following variables:\n\nNo - observation number (1,...,294).\nTIME - survival time of halibut (time until death) in hours.\n (NOTE the Table 1 in the book claims survival time is in minutes,\n but HOURS is the correct unit)\nCENSOR - censoring indicator. 1=uncensored observation;\n 0=censored observation.\nTOWD - duration (in minutes) of time trawl net was towed on the bottom.\nDELDEPTH - difference between maximum and minimum depth observed during tow\n (depth measured in meters).\nLENGTH - fork length of halibut in centimeters.\nHANDTIME - handling time (in minutes) between net coming on board vessel \n and fish being placed in holding tanks.\nLOGCAT - natural logarithm of total catch of fish in tow.", + "download": "http://lib.stat.cmu.edu/datasets/csb/ch7.dat", + "filename": "Atlantic-Halibut", + "name": "Survival Analysis for Size Regulation of Atlantic Halibut", + "number_format": 31, + "remove_quotes": true, + "separator": "SPACE", + "simplify_whitespaces": false, + "skip_empty_parts": true, + "use_first_row_for_vectorname": false + }, + { + "DateTime_format": "yyyy-MM-dd", + "comment_character": "#", + "create_index_column": false, + "description": "The file ch9.dat contains the following variables:\n\nBIRD : Bird id. \nRX : 1=NT, 2=PT, 3=FT, standing for \"No Tape\" (NT), in which no visible\n guides connected light cues\n with the feeders below them; \"Partial Tape\" (PT), in which fluorescent\n orange Dymo type provided a discontinuous (i.e., broken in two places) \n connection between each light cue and its feeder; and \"Full Tape\"\n (FT), in which the visible guide between each light cue and\n its feeder (fluorescent orange Dymo tape) was continuous.\n Feeding continued for 180 trials.\nGENDER : 0=male, 1=female. \nOUTCOME: 0=failure 1= success.", + "download": "http://lib.stat.cmu.edu/datasets/csb/ch9.dat", + "filename": "Hummingbirds", + "name": "Spatial Association Learning in Hummingbirds\n", + "number_format": 31, + "remove_quotes": true, + "separator": "SPACE", + "simplify_whitespaces": false, + "skip_empty_parts": true, + "use_first_row_for_vectorname": false + }, + { + "DateTime_format": "yyyy-MM-dd", + "comment_character": "#", + "create_index_column": false, + "description": "The file ch10.dat contains eight variables, with 30 cases for each.\nEach case refers to a site in the forest. The first variable,\n'random', is a character variable indicating whether the site is a\nspotted owl nest site (=N) or a site selected at random\ncoordinates (=R). Variables 2-8 contain the percents of mature forest\n(>80 years of age). The variable names indicate the outer radii of the\nrings in which the percents were calculated. They are: 0.91km,\n1.18km, 1.40km, 1.60km, 1.77km, 2.41km, and 3.38km. So, for example,\nthe variable '1.18km' contains the percents of mature forest in\nrings with outer radius 1.18km and inner radius .91km centered at \nthe different sites.", + "download": "http://lib.stat.cmu.edu/datasets/csb/ch10.dat", + "filename": "Habitat-Association", + "name": "Habitat Association Studies of the Northern Spotted\nOwl, Field Grouse, and Flammulated Owl\n", + "number_format": 31, + "remove_quotes": true, + "separator": "SPACE", + "simplify_whitespaces": false, + "skip_empty_parts": true, + "use_first_row_for_vectorname": false + }, + { + "DateTime_format": "yyyy-MM-dd", + "comment_character": "#", + "create_index_column": false, + "description": "The file ch11a.dat contains a body temperature time series for an\nadult female beaver (Castor canadensis) obtained December 12-13, 1990 \nat Sandhill Wildlife Area, Wisconsin. Observations were made at 10\nminute intervals. These observations follow a random pattern of\nfluctuations, typically observed during freeze-up for all beaver in\nthis study. \n\nVariable List:\n\nObservation No.\nJulian day\nTime\nBody temperature (degrees C) \nActivity (0 = animal inside retreat; 1 = animal outside retreat) \n", + "download": "http://lib.stat.cmu.edu/datasets/csb/ch11a.dat", + "filename": "Beaver-Body-Temperatures", + "name": "Time-Series Analyses of Beaver Body Temperatures", + "number_format": 31, + "remove_quotes": true, + "separator": "SPACE", + "simplify_whitespaces": false, + "skip_empty_parts": true, + "use_first_row_for_vectorname": false + }, + { + "DateTime_format": "yyyy-MM-dd", + "comment_character": "#", + "create_index_column": false, + "description": "The file ch11a.dat contains a body temperature time series for\na subadult female beaver (Castor\ncanadensis). Observations were made at Sandhill Wildlife Area,\nWisconsin, November 3-4, 1990 (before freeze-up). Temperature\nobservations follow a plateau pattern, typically observed during\nthe entire ice-free period (late spring to late autumn). Only the\nfirst 100 observations are included in this data set.\n\nVariable list:\n\nObservation number\nJulian day\nTime\nBody temperature (degrees C)\nActivity (0 = animal inside retreat; 1 = animal outside retreat)", + "download": "http://lib.stat.cmu.edu/datasets/csb/ch11b.dat", + "filename": "Beaver-Body-Temperatures2", + "name": "\nTime-Series Analyses of Beaver Body Temperatures\n", + "number_format": 31, + "remove_quotes": true, + "separator": "SPACE", + "simplify_whitespaces": false, + "skip_empty_parts": true, + "use_first_row_for_vectorname": false + }, + { + "DateTime_format": "yyyy-MM-dd", + "comment_character": "#", + "create_index_column": false, + "description": "The main data set consists of king crab pot survey data for the years 1973\nthrough 1986. The surveys were conducted in the waters around Kodiak Island,\nAlaska, using pots similar to the pots used by the commercial fishing fleet.\n(A crab pot is a trap that resembles a wooden crate.) A fixed sampling grid\nwas used to place strings of pots (stations) consisting usually of 10 pots in\nopen ocean, or of 2-5 pots in bays. The pots were left in the water for\nperiods of 16-24 hours, removed, and the crab counts recorded. The survey was\nconducted each summer, 2-4 weeks prior to start of the commercial fishing\nseason. The crab counts are classified by size (roughly representing age) and\nsex into six categories.\n\nThe basic survey data is a file \"survey\", containing a 3,450 by 14 matrix\nwith these columns:\n\n 1. Year (last two digits)\n 2. Fishing district (one of four)\n 3. Station identifier (alphabetic)\n 4. The number of pots fished\n 5-6. Latitude and longitude of the location halfway between\n the first and last pot of the station\n 7. Number of pre-recruit-4 crab\n 8. Number of pre-recruit-3 crab\n 9. Number of pre-recruit-2 crab\n 10. Number of pre-recruit-1 crab\n 11. Number of recruit males\n 12. Number of post-recruit males\n 13. Number of juvenile females\n 14. Number of adult females", + "download": "http://lib.stat.cmu.edu/crab/survey", + "filename": "survey_", + "name": "Survey", + "number_format": 31, + "remove_quotes": true, + "separator": "SPACE", + "simplify_whitespaces": false, + "skip_empty_parts": true, + "use_first_row_for_vectorname": false + }, + { + "DateTime_format": "yyyy-MM-dd", + "comment_character": "#", + "create_index_column": false, + "description": "==================== Contents of file \"dstns\" ============================\n \nFor each of the years in the survey (1973 to 1986), a frequency distribution\nof the crab by size (in 1 mm increments) that were surveyed. Separate\ndistributions are given for juvenile females, adult females, and all males.\nThe five columns are:\n\n 1. year\n 2. length in mm\n 3. count of juvenile females\n 4. count of adult females\n 5. count of all males", + "download": "http://lib.stat.cmu.edu/crab/dstns", + "filename": "dstns", + "name": "dstns", + "number_format": 31, + "remove_quotes": true, + "separator": "SPACE", + "simplify_whitespaces": false, + "skip_empty_parts": true, + "use_first_row_for_vectorname": false + }, + { + "DateTime_format": "yyyy-MM-dd", + "comment_character": "#", + "create_index_column": false, + "description": "For each of the 14 years in the survey (1973-86), an estimate of the number of\neggs per female. Columns are:\n\n 1. year\n 2. estimated eggs per adult female", + "download": "http://lib.stat.cmu.edu/crab/eggs", + "filename": "eggs", + "name": "eggs per female", + "number_format": 31, + "remove_quotes": true, + "separator": "SPACE", + "simplify_whitespaces": false, + "skip_empty_parts": true, + "use_first_row_for_vectorname": false + }, + { + "DateTime_format": "yyyy-MM-dd", + "comment_character": "#", + "create_index_column": false, + "description": "For each year in the survey, a frequency distribution of all females\ncross-classified by size (in 1 mm increments) and percent clutch fullness (5\ncategories). Clutch fullness is, roughly, the realized egg-bearing potential\nof a female crab. The seven columns are:\n\n 1. year\n 2. size, in mm\n 3. count of females with 0% fullness\n 4. count of females with 1-29% fullness\n 5. count of females with 30-59% fullness\n 6. count of females with 60-89% fullness\n 7. count of females with 90-100% fullness", + "download": "http://lib.stat.cmu.edu/crab/fullness", + "filename": "fullness", + "name": "Clutch fullness", + "number_format": 31, + "remove_quotes": true, + "separator": "SPACE", + "simplify_whitespaces": false, + "skip_empty_parts": true, + "use_first_row_for_vectorname": false + } + ], + "subcategory_name": "Animals" + }, + { + "datasets": [ + { + "DateTime_format": "yyyy-MM-dd", + "comment_character": "#", + "create_index_column": false, + "description": "John O. Rawlings and Susan E. Spruill\n\nThe data set ch5.dat contains the following variables:\n\n1. site: coded 1-6 corresponding to the location code used in Table 1.\n2. block: block within site coded 1, 2, ... within sites for the RCB designs;\n block=1 for all observations for the CRD designs, sites 5 and 6.\n3. rep: replication within site coded as missing in sites 1-4;\n coded as 1, 2, ... for replicates in the CRD design.\n4. ozone: target ozone treatment, coded 0.0=charcoal filtered air, \n 1.0=nonfiltered air, \"x.x\"=target level of ozone as multiple of \n ambient ozone level.\n5. rain: acidic rain treatment, coded as pH of rain solution.\n6. fam: genetic family, coded as 1, 2, ... within sites.\n7. ppmhrs: cumulative ozone exposure (ppm-h) during the two years of\n the trials.\n8. vwpH: cumulative exposure to acidic rain computed as vwpH \n = -log(sum(volume*hydrogen ion concentration)).\n9. biomass: total above ground biomass (g) after two growing seasons.\n10. diam: increment of diameter growth (mm) during the two growing seasons.\n11. DMA: whole-plot component of the covariate initial diameter (mm)\n expressed as the deviation of the whole-plot mean from the overall\n site mean.\n12. DMB: sub-plot component of the covariate initial diameter (mm)\n expressed as the deviation of the subplot mean from the whole-plot mean.\n13. D2HA: whole-plot component of the covariate initial volume, \n approximated as diameter squared times height, and expressed as\n the deviation of the whole-plot mean from the overall site mean.\n14. D2HB: sub-plot component of the covariate initial volume and\n expressed as the deviation of the subplot mean from the whole-plot mean.\n15. DMOT: depth to mottling (cm) of the clay soil; one measurement\n per whole-plot. \n\nMissing data are coded with '.'", + "download": "http://lib.stat.cmu.edu/datasets/csb/ch5.dat", + "filename": "Pine-Seedling", + "name": "Estimating Pine Seedling Response to Ozone and Acid Rain", + "number_format": 31, + "remove_quotes": true, + "separator": "SPACE", + "simplify_whitespaces": false, + "skip_empty_parts": true, + "use_first_row_for_vectorname": false + }, + { + "DateTime_format": "yyyy-MM-dd", + "comment_character": "#", + "create_index_column": false, + "description": "The file ch8.dat contains the following variables:\n\nPop - population code, 1034 or 1040\nADH - 1 (cepa), 2 (het) or 3 (fist)\nIDH - 1 (cepa), 2 (het) or 3 (fist)\nPGI - 1 (cepa), 2 (het) or 3 (fist)\nfreq - frequency", + "download": "http://lib.stat.cmu.edu/datasets/csb/ch8.dat", + "filename": "Hybrid-Onions", + "name": "\nMixture Fraction and Linkage Analyses for Hybrid Onions", + "number_format": 31, + "remove_quotes": true, + "separator": "SPACE", + "simplify_whitespaces": false, + "skip_empty_parts": true, + "use_first_row_for_vectorname": false + } + ], + "subcategory_name": "Plants" + }, + { + "datasets": [ + { + "DateTime_format": "yyyy-MM-dd", + "comment_character": "#", + "create_index_column": false, + "description": "The file ch6.dat contains the following variables. \n\n\nSTRATA : National Marine Fisheries Service (NMFS) 4 digit strata\n designator in which the sample was taken \n \nSAMPLE : Sample number per year ranging from 1 to approximately 450\n\nLAT : Location in terms of latitude of each sample in the Atlantic Ocean \n\nLONG : Location in terms of longitude of each sample in the Atlantic Ocean\n \nTCATCH : Total number of scallops caught at the ith sample location\n\nPREREC : Number of scallops whose shell length is smaller than 70 millimeters\n \nRECRUITS : Number of scallops whose shell length is 70 millimeters or larger", + "download": "http://lib.stat.cmu.edu/datasets/csb/ch6.dat", + "filename": "Scallop-Abundance", + "name": "Geostatistical Estimates of Scallop Abundance", + "number_format": 31, + "remove_quotes": true, + "separator": "SPACE", + "simplify_whitespaces": false, + "skip_empty_parts": true, + "use_first_row_for_vectorname": false + } + ], + "subcategory_name": "Geology" + }, + { + "datasets": [ + { + "DateTime_format": "yyyy-MM-dd", + "comment_character": "#", + "create_index_column": false, + "description": "Geographical coordinates of the shoreline of the 17 islands that form the\nKodiak Island group. The two columns are\n\n 1. latitude\n 2. longitude\n\nmeasured in degrees and fractions of a degree. Each of the 17 groups of\ncoordinates is terminated by a pair of \"NA\"s, and the end of each group loops\nback to the beginning. For drawing maps, bear in mind that longitude is\nmeasured East to West, which is right to left. This suggests plotting\nnegative longitude instead of longitude. Also, to draw maps that \"look right\"\nto an Alaskan, you must take into account that in this part of the world the\naspect ratio of one degree latitude (y-axis) to one degree longitude (x-axis)\nis 1:1.8 (in terms of actual ground distance).", + "download": "http://lib.stat.cmu.edu/crab/kodiak", + "filename": "kodiak", + "name": "Geographical coordinates of the shoreline of Kodiak Island group", + "number_format": 31, + "remove_quotes": true, + "separator": "SPACE", + "simplify_whitespaces": false, + "skip_empty_parts": true, + "use_first_row_for_vectorname": false + } + ], + "subcategory_name": "Other" + } + ] + }, + { + "category_name": "Statistics", + "subcategories": [ + { + "datasets": [ + { + "DateTime_format": "yyyy-MM-dd", + "comment_character": "#", + "create_index_column": false, + "description": "Some statistics on the fishing fleet and commercial catch, for each year\nbetween 1960 and 1982. The six columns are:\n\n 1. year\n 2. number of vessels registered for fishing\n 3. number of crab caught\n 4. total weight in kilograms of crab caught\n 5. total number of pot-lifts.\n 6. wholesale price of king crab in dollars per pound", + "download": "http://lib.stat.cmu.edu/crab/fleet", + "filename": "fleet", + "name": "fishing fleet and commercial catch", + "number_format": 31, + "remove_quotes": true, + "separator": "SPACE", + "simplify_whitespaces": false, + "skip_empty_parts": true, + "use_first_row_for_vectorname": false + }, + { + "DateTime_format": "yyyy-MM-dd", + "comment_character": "#", + "create_index_column": false, + "description": "Commercial catch data for 1960-1982, broken out by district. The four columns\nare:\n\n 1. year\n 2. district number (1, 2, 3 or 4)\n 3. total catch as a count\n 4. total catch in kilograms", + "download": "http://lib.stat.cmu.edu/crab/catch", + "filename": "catch", + "name": "Commercial catch data", + "number_format": 31, + "remove_quotes": true, + "separator": "SPACE", + "simplify_whitespaces": false, + "skip_empty_parts": true, + "use_first_row_for_vectorname": false + } + ], + "subcategory_name": "Economics" + }, + { + "datasets": [ + { + "DateTime_format": "yyyy-MM-dd", + "comment_character": "#", + "create_index_column": false, + "description": "The file ch12.dat contains the following variables:\n\nlstay: Length of stay of a resident\nage: Age of a resident\ntrt: Nursing home assignment (1: receive treament,0: control)\ngender: Gender (1:male,0:female)\nmarstat: Marital status (1: married,0: not married)\nhlstat: Health status (2: second best, 5: worst)\ncens: Censoring indicator (1:censored, 0: discharged)", + "download": "http://lib.stat.cmu.edu/datasets/csb/ch12.dat", + "filename": "Nursing-Home-Usage", + "name": "\nParametric Duration Analysis of Nursing Home Usage", + "number_format": 31, + "remove_quotes": true, + "separator": "SPACE", + "simplify_whitespaces": false, + "skip_empty_parts": true, + "use_first_row_for_vectorname": false + }, + { + "DateTime_format": "yyyy-MM-dd", + "comment_character": "#", + "create_index_column": false, + "description": "This data set was derived from sample survey data collected in 1988\nin two surveys designed to evaluate the City of Toronto Workplace\nSmoking By-law (National Health Research and Development Program,\nCanada, Project Grant 6606-3346-46). The principal investigator\nwas Dr. L.L. Pederson, University of Western Ontario, Ontario,\nCanada. The surveys were conducted by the Institute for Social\nResearch at York University, Ontario, Canada in January-February\n1988 and in November-December 1988. By agreement with the\nInstitute for Social Research, York University, the survey data are\nin the public domain. This data set can be used freely for\nnoncommercial purposes and can be freely distributed.\n\nThere are 15 variables in the data set, with values separated by\nblanks. There are no missing values. The CSB variable names are as\nfollows: \n\nidno y w x1 x2 x3 z1 z2 z3 z4 z5 z6 z7 z8 z9\n\n\nSHORT DESCRIPTION NAME DEFINITION AND CODING\n\nUnique identifier idno (5 digits, beginning with 1 or 2)\n\nOutcome y Attitude toward smoking in the\n workplace. Smoking should be: \n (1 = prohibited, 2 = restricted,\n 0 = unrestricted)\n\nWeight w Sampling/post-stratification weight\n (ranges from 0.305 to 4.494)\n\nTime x1 Time of survey relative to\n implementation of the by-law \n on March 1, 1988\n (1 = post, 0 = pre)\n\nWork x2 Place of work indicator 1\n with City of Toronto as baseline\n (1 = outside City of Toronto,\n 0 = otherwise)\n\n x3 Place of work indicator 2\n with City of Toronto as baseline\n (1 = not outside the home, \n 0 = otherwise)\n\nResidence z1 Place of residence\n (1 = City of Toronto, \n 0 = other Metro Toronto)\n\nSmoking z2 Smoking status indicator 1\n with those who have never smoked \n as the baseline\n (1 = current smoker, \n 0 = otherwise)\n\n z3 Smoking status indicator 2\n with never as the baseline\n (1 = quit <=6 months ago, \n 0 = otherwise)\n\n z4 Smoking status indicator 3\n with never as the baseline\n (1 = quit >6 months ago, \n 0 = otherwise)\n\n z5 Smoking status indicator 4\n with quit >12 months as the baseline\n (1 = quit 6-12 months, \n 0 = otherwise)\n\nKnowledge z6 Knowledge of health effects of\n environmental tobacco smoke\n (score, ranges from 0 to 12)\n\nSex z7 Sex of respondent\n (1 = male, 0 = female)\nAge z8 Age of respondent\n ( (age in years - 50)/10 )\n\nEducation z9 Level of education\n (-2 = elementary, \n -1 = some high school, \n 0 = high or trade school, \n 1 = college or some university,\n 2 = university degree)\n ", + "download": "http://lib.stat.cmu.edu/datasets/csb/ch13.dat", + "filename": "Smoking-Restrictions", + "name": "Analysis of Attitudes Towards Workplace Smoking Restrictions", + "number_format": 31, + "remove_quotes": true, + "separator": "SPACE", + "simplify_whitespaces": false, + "skip_empty_parts": true, + "use_first_row_for_vectorname": false + } + ], + "subcategory_name": "Population" + } + ] + } + ], + "collection_name": "StatLib" +}